sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 934 "ALLOWED_VALUES": lambda self: self.expression( 935 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 936 ), 937 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 938 "AUTO": lambda self: self._parse_auto_property(), 939 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 940 "BACKUP": lambda self: self.expression( 941 exp.BackupProperty, this=self._parse_var(any_token=True) 942 ), 943 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 944 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 945 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHECKSUM": lambda self: self._parse_checksum(), 947 "CLUSTER BY": lambda self: self._parse_cluster(), 948 "CLUSTERED": lambda self: self._parse_clustered_by(), 949 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 950 exp.CollateProperty, **kwargs 951 ), 952 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 953 "CONTAINS": lambda self: self._parse_contains_property(), 954 "COPY": lambda self: self._parse_copy_property(), 955 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 956 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 957 "DEFINER": lambda self: self._parse_definer(), 958 "DETERMINISTIC": lambda self: self.expression( 959 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 960 ), 961 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 962 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 963 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 964 "DISTKEY": lambda self: self._parse_distkey(), 965 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 966 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 967 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 968 "ENVIRONMENT": lambda self: self.expression( 969 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 970 ), 971 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 972 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 973 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 974 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 975 "FREESPACE": lambda self: self._parse_freespace(), 976 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 977 "HEAP": lambda self: self.expression(exp.HeapProperty), 978 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 979 "IMMUTABLE": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "INHERITS": lambda self: self.expression( 983 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 984 ), 985 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 986 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 987 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 988 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 989 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 990 "LIKE": lambda self: self._parse_create_like(), 991 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 992 "LOCK": lambda self: self._parse_locking(), 993 "LOCKING": lambda self: self._parse_locking(), 994 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 995 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 996 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 997 "MODIFIES": lambda self: self._parse_modifies_property(), 998 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 999 "NO": lambda self: self._parse_no_property(), 1000 "ON": lambda self: self._parse_on_property(), 1001 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1002 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1003 "PARTITION": lambda self: self._parse_partitioned_of(), 1004 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1005 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1007 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1008 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1009 "READS": lambda self: self._parse_reads_property(), 1010 "REMOTE": lambda self: self._parse_remote_with_connection(), 1011 "RETURNS": lambda self: self._parse_returns(), 1012 "STRICT": lambda self: self.expression(exp.StrictProperty), 1013 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1014 "ROW": lambda self: self._parse_row(), 1015 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1016 "SAMPLE": lambda self: self.expression( 1017 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1018 ), 1019 "SECURE": lambda self: self.expression(exp.SecureProperty), 1020 "SECURITY": lambda self: self._parse_security(), 1021 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1022 "SETTINGS": lambda self: self._parse_settings_property(), 1023 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1024 "SORTKEY": lambda self: self._parse_sortkey(), 1025 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1026 "STABLE": lambda self: self.expression( 1027 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1028 ), 1029 "STORED": lambda self: self._parse_stored(), 1030 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1031 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1032 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1033 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1034 "TO": lambda self: self._parse_to_table(), 1035 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1036 "TRANSFORM": lambda self: self.expression( 1037 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1038 ), 1039 "TTL": lambda self: self._parse_ttl(), 1040 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1041 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1042 "VOLATILE": lambda self: self._parse_volatile_property(), 1043 "WITH": lambda self: self._parse_with_property(), 1044 } 1045 1046 CONSTRAINT_PARSERS = { 1047 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1048 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1049 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1050 "CHARACTER SET": lambda self: self.expression( 1051 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "CHECK": lambda self: self.expression( 1054 exp.CheckColumnConstraint, 1055 this=self._parse_wrapped(self._parse_assignment), 1056 enforced=self._match_text_seq("ENFORCED"), 1057 ), 1058 "COLLATE": lambda self: self.expression( 1059 exp.CollateColumnConstraint, 1060 this=self._parse_identifier() or self._parse_column(), 1061 ), 1062 "COMMENT": lambda self: self.expression( 1063 exp.CommentColumnConstraint, this=self._parse_string() 1064 ), 1065 "COMPRESS": lambda self: self._parse_compress(), 1066 "CLUSTERED": lambda self: self.expression( 1067 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1068 ), 1069 "NONCLUSTERED": lambda self: self.expression( 1070 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1071 ), 1072 "DEFAULT": lambda self: self.expression( 1073 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1074 ), 1075 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1076 "EPHEMERAL": lambda self: self.expression( 1077 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1078 ), 1079 "EXCLUDE": lambda self: self.expression( 1080 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1081 ), 1082 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1083 "FORMAT": lambda self: self.expression( 1084 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1085 ), 1086 "GENERATED": lambda self: self._parse_generated_as_identity(), 1087 "IDENTITY": lambda self: self._parse_auto_increment(), 1088 "INLINE": lambda self: self._parse_inline(), 1089 "LIKE": lambda self: self._parse_create_like(), 1090 "NOT": lambda self: self._parse_not_constraint(), 1091 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1092 "ON": lambda self: ( 1093 self._match(TokenType.UPDATE) 1094 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1095 ) 1096 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1097 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1098 "PERIOD": lambda self: self._parse_period_for_system_time(), 1099 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1100 "REFERENCES": lambda self: self._parse_references(match=False), 1101 "TITLE": lambda self: self.expression( 1102 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1103 ), 1104 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1105 "UNIQUE": lambda self: self._parse_unique(), 1106 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1107 "WATERMARK": lambda self: self.expression( 1108 exp.WatermarkColumnConstraint, 1109 this=self._match(TokenType.FOR) and self._parse_column(), 1110 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1111 ), 1112 "WITH": lambda self: self.expression( 1113 exp.Properties, expressions=self._parse_wrapped_properties() 1114 ), 1115 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1116 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 } 1118 1119 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1120 klass = ( 1121 exp.PartitionedByBucket 1122 if self._prev.text.upper() == "BUCKET" 1123 else exp.PartitionByTruncate 1124 ) 1125 1126 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1127 this, expression = seq_get(args, 0), seq_get(args, 1) 1128 1129 if isinstance(this, exp.Literal): 1130 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1131 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1132 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1133 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1134 # 1135 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1136 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1137 this, expression = expression, this 1138 1139 return self.expression(klass, this=this, expression=expression) 1140 1141 ALTER_PARSERS = { 1142 "ADD": lambda self: self._parse_alter_table_add(), 1143 "AS": lambda self: self._parse_select(), 1144 "ALTER": lambda self: self._parse_alter_table_alter(), 1145 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1146 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1147 "DROP": lambda self: self._parse_alter_table_drop(), 1148 "RENAME": lambda self: self._parse_alter_table_rename(), 1149 "SET": lambda self: self._parse_alter_table_set(), 1150 "SWAP": lambda self: self.expression( 1151 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1152 ), 1153 } 1154 1155 ALTER_ALTER_PARSERS = { 1156 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1157 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1158 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1159 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1160 } 1161 1162 SCHEMA_UNNAMED_CONSTRAINTS = { 1163 "CHECK", 1164 "EXCLUDE", 1165 "FOREIGN KEY", 1166 "LIKE", 1167 "PERIOD", 1168 "PRIMARY KEY", 1169 "UNIQUE", 1170 "WATERMARK", 1171 "BUCKET", 1172 "TRUNCATE", 1173 } 1174 1175 NO_PAREN_FUNCTION_PARSERS = { 1176 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1177 "CASE": lambda self: self._parse_case(), 1178 "CONNECT_BY_ROOT": lambda self: self.expression( 1179 exp.ConnectByRoot, this=self._parse_column() 1180 ), 1181 "IF": lambda self: self._parse_if(), 1182 } 1183 1184 INVALID_FUNC_NAME_TOKENS = { 1185 TokenType.IDENTIFIER, 1186 TokenType.STRING, 1187 } 1188 1189 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1190 1191 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1192 1193 FUNCTION_PARSERS = { 1194 **{ 1195 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1196 }, 1197 **{ 1198 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1199 }, 1200 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1201 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1202 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1203 "DECODE": lambda self: self._parse_decode(), 1204 "EXTRACT": lambda self: self._parse_extract(), 1205 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1206 "GAP_FILL": lambda self: self._parse_gap_fill(), 1207 "JSON_OBJECT": lambda self: self._parse_json_object(), 1208 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1209 "JSON_TABLE": lambda self: self._parse_json_table(), 1210 "MATCH": lambda self: self._parse_match_against(), 1211 "NORMALIZE": lambda self: self._parse_normalize(), 1212 "OPENJSON": lambda self: self._parse_open_json(), 1213 "OVERLAY": lambda self: self._parse_overlay(), 1214 "POSITION": lambda self: self._parse_position(), 1215 "PREDICT": lambda self: self._parse_predict(), 1216 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1217 "STRING_AGG": lambda self: self._parse_string_agg(), 1218 "SUBSTRING": lambda self: self._parse_substring(), 1219 "TRIM": lambda self: self._parse_trim(), 1220 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1221 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1222 "XMLELEMENT": lambda self: self.expression( 1223 exp.XMLElement, 1224 this=self._match_text_seq("NAME") and self._parse_id_var(), 1225 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1226 ), 1227 "XMLTABLE": lambda self: self._parse_xml_table(), 1228 } 1229 1230 QUERY_MODIFIER_PARSERS = { 1231 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1232 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1233 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1234 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1235 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1236 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1237 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1238 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1239 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1240 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1241 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1242 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1243 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1244 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1245 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.CLUSTER_BY: lambda self: ( 1247 "cluster", 1248 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1249 ), 1250 TokenType.DISTRIBUTE_BY: lambda self: ( 1251 "distribute", 1252 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1253 ), 1254 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1255 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1256 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1257 } 1258 1259 SET_PARSERS = { 1260 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1261 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1262 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1263 "TRANSACTION": lambda self: self._parse_set_transaction(), 1264 } 1265 1266 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1267 1268 TYPE_LITERAL_PARSERS = { 1269 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1270 } 1271 1272 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1273 1274 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1275 1276 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1277 1278 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1279 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1280 "ISOLATION": ( 1281 ("LEVEL", "REPEATABLE", "READ"), 1282 ("LEVEL", "READ", "COMMITTED"), 1283 ("LEVEL", "READ", "UNCOMITTED"), 1284 ("LEVEL", "SERIALIZABLE"), 1285 ), 1286 "READ": ("WRITE", "ONLY"), 1287 } 1288 1289 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1290 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1291 ) 1292 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1293 1294 CREATE_SEQUENCE: OPTIONS_TYPE = { 1295 "SCALE": ("EXTEND", "NOEXTEND"), 1296 "SHARD": ("EXTEND", "NOEXTEND"), 1297 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1298 **dict.fromkeys( 1299 ( 1300 "SESSION", 1301 "GLOBAL", 1302 "KEEP", 1303 "NOKEEP", 1304 "ORDER", 1305 "NOORDER", 1306 "NOCACHE", 1307 "CYCLE", 1308 "NOCYCLE", 1309 "NOMINVALUE", 1310 "NOMAXVALUE", 1311 "NOSCALE", 1312 "NOSHARD", 1313 ), 1314 tuple(), 1315 ), 1316 } 1317 1318 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1319 1320 USABLES: OPTIONS_TYPE = dict.fromkeys( 1321 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1322 ) 1323 1324 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1325 1326 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1327 "TYPE": ("EVOLUTION",), 1328 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1329 } 1330 1331 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1332 1333 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1334 1335 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1336 "NOT": ("ENFORCED",), 1337 "MATCH": ( 1338 "FULL", 1339 "PARTIAL", 1340 "SIMPLE", 1341 ), 1342 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1343 "USING": ( 1344 "BTREE", 1345 "HASH", 1346 ), 1347 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1348 } 1349 1350 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1351 "NO": ("OTHERS",), 1352 "CURRENT": ("ROW",), 1353 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1354 } 1355 1356 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1357 1358 CLONE_KEYWORDS = {"CLONE", "COPY"} 1359 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1360 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1361 1362 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1363 1364 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1365 1366 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1367 1368 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1369 1370 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1371 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1372 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1373 1374 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1375 1376 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1377 1378 ADD_CONSTRAINT_TOKENS = { 1379 TokenType.CONSTRAINT, 1380 TokenType.FOREIGN_KEY, 1381 TokenType.INDEX, 1382 TokenType.KEY, 1383 TokenType.PRIMARY_KEY, 1384 TokenType.UNIQUE, 1385 } 1386 1387 DISTINCT_TOKENS = {TokenType.DISTINCT} 1388 1389 NULL_TOKENS = {TokenType.NULL} 1390 1391 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1392 1393 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1394 1395 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1396 1397 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1398 1399 ODBC_DATETIME_LITERALS = { 1400 "d": exp.Date, 1401 "t": exp.Time, 1402 "ts": exp.Timestamp, 1403 } 1404 1405 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1406 1407 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1408 1409 # The style options for the DESCRIBE statement 1410 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1411 1412 # The style options for the ANALYZE statement 1413 ANALYZE_STYLES = { 1414 "BUFFER_USAGE_LIMIT", 1415 "FULL", 1416 "LOCAL", 1417 "NO_WRITE_TO_BINLOG", 1418 "SAMPLE", 1419 "SKIP_LOCKED", 1420 "VERBOSE", 1421 } 1422 1423 ANALYZE_EXPRESSION_PARSERS = { 1424 "ALL": lambda self: self._parse_analyze_columns(), 1425 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1426 "DELETE": lambda self: self._parse_analyze_delete(), 1427 "DROP": lambda self: self._parse_analyze_histogram(), 1428 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1429 "LIST": lambda self: self._parse_analyze_list(), 1430 "PREDICATE": lambda self: self._parse_analyze_columns(), 1431 "UPDATE": lambda self: self._parse_analyze_histogram(), 1432 "VALIDATE": lambda self: self._parse_analyze_validate(), 1433 } 1434 1435 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1436 1437 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1438 1439 OPERATION_MODIFIERS: t.Set[str] = set() 1440 1441 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1442 1443 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1444 1445 STRICT_CAST = True 1446 1447 PREFIXED_PIVOT_COLUMNS = False 1448 IDENTIFY_PIVOT_STRINGS = False 1449 1450 LOG_DEFAULTS_TO_LN = False 1451 1452 # Whether ADD is present for each column added by ALTER TABLE 1453 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1454 1455 # Whether the table sample clause expects CSV syntax 1456 TABLESAMPLE_CSV = False 1457 1458 # The default method used for table sampling 1459 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1460 1461 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1462 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1463 1464 # Whether the TRIM function expects the characters to trim as its first argument 1465 TRIM_PATTERN_FIRST = False 1466 1467 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1468 STRING_ALIASES = False 1469 1470 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1471 MODIFIERS_ATTACHED_TO_SET_OP = True 1472 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1473 1474 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1475 NO_PAREN_IF_COMMANDS = True 1476 1477 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1478 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1479 1480 # Whether the `:` operator is used to extract a value from a VARIANT column 1481 COLON_IS_VARIANT_EXTRACT = False 1482 1483 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1484 # If this is True and '(' is not found, the keyword will be treated as an identifier 1485 VALUES_FOLLOWED_BY_PAREN = True 1486 1487 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1488 SUPPORTS_IMPLICIT_UNNEST = False 1489 1490 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1491 INTERVAL_SPANS = True 1492 1493 # Whether a PARTITION clause can follow a table reference 1494 SUPPORTS_PARTITION_SELECTION = False 1495 1496 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1497 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1498 1499 # Whether the 'AS' keyword is optional in the CTE definition syntax 1500 OPTIONAL_ALIAS_TOKEN_CTE = True 1501 1502 __slots__ = ( 1503 "error_level", 1504 "error_message_context", 1505 "max_errors", 1506 "dialect", 1507 "sql", 1508 "errors", 1509 "_tokens", 1510 "_index", 1511 "_curr", 1512 "_next", 1513 "_prev", 1514 "_prev_comments", 1515 ) 1516 1517 # Autofilled 1518 SHOW_TRIE: t.Dict = {} 1519 SET_TRIE: t.Dict = {} 1520 1521 def __init__( 1522 self, 1523 error_level: t.Optional[ErrorLevel] = None, 1524 error_message_context: int = 100, 1525 max_errors: int = 3, 1526 dialect: DialectType = None, 1527 ): 1528 from sqlglot.dialects import Dialect 1529 1530 self.error_level = error_level or ErrorLevel.IMMEDIATE 1531 self.error_message_context = error_message_context 1532 self.max_errors = max_errors 1533 self.dialect = Dialect.get_or_raise(dialect) 1534 self.reset() 1535 1536 def reset(self): 1537 self.sql = "" 1538 self.errors = [] 1539 self._tokens = [] 1540 self._index = 0 1541 self._curr = None 1542 self._next = None 1543 self._prev = None 1544 self._prev_comments = None 1545 1546 def parse( 1547 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1548 ) -> t.List[t.Optional[exp.Expression]]: 1549 """ 1550 Parses a list of tokens and returns a list of syntax trees, one tree 1551 per parsed SQL statement. 1552 1553 Args: 1554 raw_tokens: The list of tokens. 1555 sql: The original SQL string, used to produce helpful debug messages. 1556 1557 Returns: 1558 The list of the produced syntax trees. 1559 """ 1560 return self._parse( 1561 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1562 ) 1563 1564 def parse_into( 1565 self, 1566 expression_types: exp.IntoType, 1567 raw_tokens: t.List[Token], 1568 sql: t.Optional[str] = None, 1569 ) -> t.List[t.Optional[exp.Expression]]: 1570 """ 1571 Parses a list of tokens into a given Expression type. If a collection of Expression 1572 types is given instead, this method will try to parse the token list into each one 1573 of them, stopping at the first for which the parsing succeeds. 1574 1575 Args: 1576 expression_types: The expression type(s) to try and parse the token list into. 1577 raw_tokens: The list of tokens. 1578 sql: The original SQL string, used to produce helpful debug messages. 1579 1580 Returns: 1581 The target Expression. 1582 """ 1583 errors = [] 1584 for expression_type in ensure_list(expression_types): 1585 parser = self.EXPRESSION_PARSERS.get(expression_type) 1586 if not parser: 1587 raise TypeError(f"No parser registered for {expression_type}") 1588 1589 try: 1590 return self._parse(parser, raw_tokens, sql) 1591 except ParseError as e: 1592 e.errors[0]["into_expression"] = expression_type 1593 errors.append(e) 1594 1595 raise ParseError( 1596 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1597 errors=merge_errors(errors), 1598 ) from errors[-1] 1599 1600 def _parse( 1601 self, 1602 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1603 raw_tokens: t.List[Token], 1604 sql: t.Optional[str] = None, 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 self.reset() 1607 self.sql = sql or "" 1608 1609 total = len(raw_tokens) 1610 chunks: t.List[t.List[Token]] = [[]] 1611 1612 for i, token in enumerate(raw_tokens): 1613 if token.token_type == TokenType.SEMICOLON: 1614 if token.comments: 1615 chunks.append([token]) 1616 1617 if i < total - 1: 1618 chunks.append([]) 1619 else: 1620 chunks[-1].append(token) 1621 1622 expressions = [] 1623 1624 for tokens in chunks: 1625 self._index = -1 1626 self._tokens = tokens 1627 self._advance() 1628 1629 expressions.append(parse_method(self)) 1630 1631 if self._index < len(self._tokens): 1632 self.raise_error("Invalid expression / Unexpected token") 1633 1634 self.check_errors() 1635 1636 return expressions 1637 1638 def check_errors(self) -> None: 1639 """Logs or raises any found errors, depending on the chosen error level setting.""" 1640 if self.error_level == ErrorLevel.WARN: 1641 for error in self.errors: 1642 logger.error(str(error)) 1643 elif self.error_level == ErrorLevel.RAISE and self.errors: 1644 raise ParseError( 1645 concat_messages(self.errors, self.max_errors), 1646 errors=merge_errors(self.errors), 1647 ) 1648 1649 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1650 """ 1651 Appends an error in the list of recorded errors or raises it, depending on the chosen 1652 error level setting. 1653 """ 1654 token = token or self._curr or self._prev or Token.string("") 1655 start = token.start 1656 end = token.end + 1 1657 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1658 highlight = self.sql[start:end] 1659 end_context = self.sql[end : end + self.error_message_context] 1660 1661 error = ParseError.new( 1662 f"{message}. Line {token.line}, Col: {token.col}.\n" 1663 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1664 description=message, 1665 line=token.line, 1666 col=token.col, 1667 start_context=start_context, 1668 highlight=highlight, 1669 end_context=end_context, 1670 ) 1671 1672 if self.error_level == ErrorLevel.IMMEDIATE: 1673 raise error 1674 1675 self.errors.append(error) 1676 1677 def expression( 1678 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1679 ) -> E: 1680 """ 1681 Creates a new, validated Expression. 1682 1683 Args: 1684 exp_class: The expression class to instantiate. 1685 comments: An optional list of comments to attach to the expression. 1686 kwargs: The arguments to set for the expression along with their respective values. 1687 1688 Returns: 1689 The target expression. 1690 """ 1691 instance = exp_class(**kwargs) 1692 instance.add_comments(comments) if comments else self._add_comments(instance) 1693 return self.validate_expression(instance) 1694 1695 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1696 if expression and self._prev_comments: 1697 expression.add_comments(self._prev_comments) 1698 self._prev_comments = None 1699 1700 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1701 """ 1702 Validates an Expression, making sure that all its mandatory arguments are set. 1703 1704 Args: 1705 expression: The expression to validate. 1706 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1707 1708 Returns: 1709 The validated expression. 1710 """ 1711 if self.error_level != ErrorLevel.IGNORE: 1712 for error_message in expression.error_messages(args): 1713 self.raise_error(error_message) 1714 1715 return expression 1716 1717 def _find_sql(self, start: Token, end: Token) -> str: 1718 return self.sql[start.start : end.end + 1] 1719 1720 def _is_connected(self) -> bool: 1721 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1722 1723 def _advance(self, times: int = 1) -> None: 1724 self._index += times 1725 self._curr = seq_get(self._tokens, self._index) 1726 self._next = seq_get(self._tokens, self._index + 1) 1727 1728 if self._index > 0: 1729 self._prev = self._tokens[self._index - 1] 1730 self._prev_comments = self._prev.comments 1731 else: 1732 self._prev = None 1733 self._prev_comments = None 1734 1735 def _retreat(self, index: int) -> None: 1736 if index != self._index: 1737 self._advance(index - self._index) 1738 1739 def _warn_unsupported(self) -> None: 1740 if len(self._tokens) <= 1: 1741 return 1742 1743 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1744 # interested in emitting a warning for the one being currently processed. 1745 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1746 1747 logger.warning( 1748 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1749 ) 1750 1751 def _parse_command(self) -> exp.Command: 1752 self._warn_unsupported() 1753 return self.expression( 1754 exp.Command, 1755 comments=self._prev_comments, 1756 this=self._prev.text.upper(), 1757 expression=self._parse_string(), 1758 ) 1759 1760 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1761 """ 1762 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1763 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1764 solve this by setting & resetting the parser state accordingly 1765 """ 1766 index = self._index 1767 error_level = self.error_level 1768 1769 self.error_level = ErrorLevel.IMMEDIATE 1770 try: 1771 this = parse_method() 1772 except ParseError: 1773 this = None 1774 finally: 1775 if not this or retreat: 1776 self._retreat(index) 1777 self.error_level = error_level 1778 1779 return this 1780 1781 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1782 start = self._prev 1783 exists = self._parse_exists() if allow_exists else None 1784 1785 self._match(TokenType.ON) 1786 1787 materialized = self._match_text_seq("MATERIALIZED") 1788 kind = self._match_set(self.CREATABLES) and self._prev 1789 if not kind: 1790 return self._parse_as_command(start) 1791 1792 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1793 this = self._parse_user_defined_function(kind=kind.token_type) 1794 elif kind.token_type == TokenType.TABLE: 1795 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1796 elif kind.token_type == TokenType.COLUMN: 1797 this = self._parse_column() 1798 else: 1799 this = self._parse_id_var() 1800 1801 self._match(TokenType.IS) 1802 1803 return self.expression( 1804 exp.Comment, 1805 this=this, 1806 kind=kind.text, 1807 expression=self._parse_string(), 1808 exists=exists, 1809 materialized=materialized, 1810 ) 1811 1812 def _parse_to_table( 1813 self, 1814 ) -> exp.ToTableProperty: 1815 table = self._parse_table_parts(schema=True) 1816 return self.expression(exp.ToTableProperty, this=table) 1817 1818 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1819 def _parse_ttl(self) -> exp.Expression: 1820 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1821 this = self._parse_bitwise() 1822 1823 if self._match_text_seq("DELETE"): 1824 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1825 if self._match_text_seq("RECOMPRESS"): 1826 return self.expression( 1827 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1828 ) 1829 if self._match_text_seq("TO", "DISK"): 1830 return self.expression( 1831 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1832 ) 1833 if self._match_text_seq("TO", "VOLUME"): 1834 return self.expression( 1835 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1836 ) 1837 1838 return this 1839 1840 expressions = self._parse_csv(_parse_ttl_action) 1841 where = self._parse_where() 1842 group = self._parse_group() 1843 1844 aggregates = None 1845 if group and self._match(TokenType.SET): 1846 aggregates = self._parse_csv(self._parse_set_item) 1847 1848 return self.expression( 1849 exp.MergeTreeTTL, 1850 expressions=expressions, 1851 where=where, 1852 group=group, 1853 aggregates=aggregates, 1854 ) 1855 1856 def _parse_statement(self) -> t.Optional[exp.Expression]: 1857 if self._curr is None: 1858 return None 1859 1860 if self._match_set(self.STATEMENT_PARSERS): 1861 comments = self._prev_comments 1862 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1863 stmt.add_comments(comments, prepend=True) 1864 return stmt 1865 1866 if self._match_set(self.dialect.tokenizer.COMMANDS): 1867 return self._parse_command() 1868 1869 expression = self._parse_expression() 1870 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1871 return self._parse_query_modifiers(expression) 1872 1873 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1874 start = self._prev 1875 temporary = self._match(TokenType.TEMPORARY) 1876 materialized = self._match_text_seq("MATERIALIZED") 1877 1878 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1879 if not kind: 1880 return self._parse_as_command(start) 1881 1882 concurrently = self._match_text_seq("CONCURRENTLY") 1883 if_exists = exists or self._parse_exists() 1884 1885 if kind == "COLUMN": 1886 this = self._parse_column() 1887 else: 1888 this = self._parse_table_parts( 1889 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1890 ) 1891 1892 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1893 1894 if self._match(TokenType.L_PAREN, advance=False): 1895 expressions = self._parse_wrapped_csv(self._parse_types) 1896 else: 1897 expressions = None 1898 1899 return self.expression( 1900 exp.Drop, 1901 exists=if_exists, 1902 this=this, 1903 expressions=expressions, 1904 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1905 temporary=temporary, 1906 materialized=materialized, 1907 cascade=self._match_text_seq("CASCADE"), 1908 constraints=self._match_text_seq("CONSTRAINTS"), 1909 purge=self._match_text_seq("PURGE"), 1910 cluster=cluster, 1911 concurrently=concurrently, 1912 ) 1913 1914 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1915 return ( 1916 self._match_text_seq("IF") 1917 and (not not_ or self._match(TokenType.NOT)) 1918 and self._match(TokenType.EXISTS) 1919 ) 1920 1921 def _parse_create(self) -> exp.Create | exp.Command: 1922 # Note: this can't be None because we've matched a statement parser 1923 start = self._prev 1924 1925 replace = ( 1926 start.token_type == TokenType.REPLACE 1927 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1928 or self._match_pair(TokenType.OR, TokenType.ALTER) 1929 ) 1930 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1931 1932 unique = self._match(TokenType.UNIQUE) 1933 1934 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1935 clustered = True 1936 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1937 "COLUMNSTORE" 1938 ): 1939 clustered = False 1940 else: 1941 clustered = None 1942 1943 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1944 self._advance() 1945 1946 properties = None 1947 create_token = self._match_set(self.CREATABLES) and self._prev 1948 1949 if not create_token: 1950 # exp.Properties.Location.POST_CREATE 1951 properties = self._parse_properties() 1952 create_token = self._match_set(self.CREATABLES) and self._prev 1953 1954 if not properties or not create_token: 1955 return self._parse_as_command(start) 1956 1957 concurrently = self._match_text_seq("CONCURRENTLY") 1958 exists = self._parse_exists(not_=True) 1959 this = None 1960 expression: t.Optional[exp.Expression] = None 1961 indexes = None 1962 no_schema_binding = None 1963 begin = None 1964 end = None 1965 clone = None 1966 1967 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1968 nonlocal properties 1969 if properties and temp_props: 1970 properties.expressions.extend(temp_props.expressions) 1971 elif temp_props: 1972 properties = temp_props 1973 1974 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1975 this = self._parse_user_defined_function(kind=create_token.token_type) 1976 1977 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1978 extend_props(self._parse_properties()) 1979 1980 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1981 extend_props(self._parse_properties()) 1982 1983 if not expression: 1984 if self._match(TokenType.COMMAND): 1985 expression = self._parse_as_command(self._prev) 1986 else: 1987 begin = self._match(TokenType.BEGIN) 1988 return_ = self._match_text_seq("RETURN") 1989 1990 if self._match(TokenType.STRING, advance=False): 1991 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1992 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1993 expression = self._parse_string() 1994 extend_props(self._parse_properties()) 1995 else: 1996 expression = self._parse_user_defined_function_expression() 1997 1998 end = self._match_text_seq("END") 1999 2000 if return_: 2001 expression = self.expression(exp.Return, this=expression) 2002 elif create_token.token_type == TokenType.INDEX: 2003 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2004 if not self._match(TokenType.ON): 2005 index = self._parse_id_var() 2006 anonymous = False 2007 else: 2008 index = None 2009 anonymous = True 2010 2011 this = self._parse_index(index=index, anonymous=anonymous) 2012 elif create_token.token_type in self.DB_CREATABLES: 2013 table_parts = self._parse_table_parts( 2014 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2015 ) 2016 2017 # exp.Properties.Location.POST_NAME 2018 self._match(TokenType.COMMA) 2019 extend_props(self._parse_properties(before=True)) 2020 2021 this = self._parse_schema(this=table_parts) 2022 2023 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2024 extend_props(self._parse_properties()) 2025 2026 has_alias = self._match(TokenType.ALIAS) 2027 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2028 # exp.Properties.Location.POST_ALIAS 2029 extend_props(self._parse_properties()) 2030 2031 if create_token.token_type == TokenType.SEQUENCE: 2032 expression = self._parse_types() 2033 extend_props(self._parse_properties()) 2034 else: 2035 expression = self._parse_ddl_select() 2036 2037 # Some dialects also support using a table as an alias instead of a SELECT. 2038 # Here we fallback to this as an alternative. 2039 if not expression and has_alias: 2040 expression = self._try_parse(self._parse_table_parts) 2041 2042 if create_token.token_type == TokenType.TABLE: 2043 # exp.Properties.Location.POST_EXPRESSION 2044 extend_props(self._parse_properties()) 2045 2046 indexes = [] 2047 while True: 2048 index = self._parse_index() 2049 2050 # exp.Properties.Location.POST_INDEX 2051 extend_props(self._parse_properties()) 2052 if not index: 2053 break 2054 else: 2055 self._match(TokenType.COMMA) 2056 indexes.append(index) 2057 elif create_token.token_type == TokenType.VIEW: 2058 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2059 no_schema_binding = True 2060 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2061 extend_props(self._parse_properties()) 2062 2063 shallow = self._match_text_seq("SHALLOW") 2064 2065 if self._match_texts(self.CLONE_KEYWORDS): 2066 copy = self._prev.text.lower() == "copy" 2067 clone = self.expression( 2068 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2069 ) 2070 2071 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2072 return self._parse_as_command(start) 2073 2074 create_kind_text = create_token.text.upper() 2075 return self.expression( 2076 exp.Create, 2077 this=this, 2078 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2079 replace=replace, 2080 refresh=refresh, 2081 unique=unique, 2082 expression=expression, 2083 exists=exists, 2084 properties=properties, 2085 indexes=indexes, 2086 no_schema_binding=no_schema_binding, 2087 begin=begin, 2088 end=end, 2089 clone=clone, 2090 concurrently=concurrently, 2091 clustered=clustered, 2092 ) 2093 2094 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2095 seq = exp.SequenceProperties() 2096 2097 options = [] 2098 index = self._index 2099 2100 while self._curr: 2101 self._match(TokenType.COMMA) 2102 if self._match_text_seq("INCREMENT"): 2103 self._match_text_seq("BY") 2104 self._match_text_seq("=") 2105 seq.set("increment", self._parse_term()) 2106 elif self._match_text_seq("MINVALUE"): 2107 seq.set("minvalue", self._parse_term()) 2108 elif self._match_text_seq("MAXVALUE"): 2109 seq.set("maxvalue", self._parse_term()) 2110 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2111 self._match_text_seq("=") 2112 seq.set("start", self._parse_term()) 2113 elif self._match_text_seq("CACHE"): 2114 # T-SQL allows empty CACHE which is initialized dynamically 2115 seq.set("cache", self._parse_number() or True) 2116 elif self._match_text_seq("OWNED", "BY"): 2117 # "OWNED BY NONE" is the default 2118 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2119 else: 2120 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2121 if opt: 2122 options.append(opt) 2123 else: 2124 break 2125 2126 seq.set("options", options if options else None) 2127 return None if self._index == index else seq 2128 2129 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2130 # only used for teradata currently 2131 self._match(TokenType.COMMA) 2132 2133 kwargs = { 2134 "no": self._match_text_seq("NO"), 2135 "dual": self._match_text_seq("DUAL"), 2136 "before": self._match_text_seq("BEFORE"), 2137 "default": self._match_text_seq("DEFAULT"), 2138 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2139 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2140 "after": self._match_text_seq("AFTER"), 2141 "minimum": self._match_texts(("MIN", "MINIMUM")), 2142 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2143 } 2144 2145 if self._match_texts(self.PROPERTY_PARSERS): 2146 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2147 try: 2148 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2149 except TypeError: 2150 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2151 2152 return None 2153 2154 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2155 return self._parse_wrapped_csv(self._parse_property) 2156 2157 def _parse_property(self) -> t.Optional[exp.Expression]: 2158 if self._match_texts(self.PROPERTY_PARSERS): 2159 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2160 2161 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2162 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2163 2164 if self._match_text_seq("COMPOUND", "SORTKEY"): 2165 return self._parse_sortkey(compound=True) 2166 2167 if self._match_text_seq("SQL", "SECURITY"): 2168 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2169 2170 index = self._index 2171 key = self._parse_column() 2172 2173 if not self._match(TokenType.EQ): 2174 self._retreat(index) 2175 return self._parse_sequence_properties() 2176 2177 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2178 if isinstance(key, exp.Column): 2179 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2180 2181 value = self._parse_bitwise() or self._parse_var(any_token=True) 2182 2183 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2184 if isinstance(value, exp.Column): 2185 value = exp.var(value.name) 2186 2187 return self.expression(exp.Property, this=key, value=value) 2188 2189 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2190 if self._match_text_seq("BY"): 2191 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2192 2193 self._match(TokenType.ALIAS) 2194 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2195 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2196 2197 return self.expression( 2198 exp.FileFormatProperty, 2199 this=( 2200 self.expression( 2201 exp.InputOutputFormat, 2202 input_format=input_format, 2203 output_format=output_format, 2204 ) 2205 if input_format or output_format 2206 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2207 ), 2208 ) 2209 2210 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2211 field = self._parse_field() 2212 if isinstance(field, exp.Identifier) and not field.quoted: 2213 field = exp.var(field) 2214 2215 return field 2216 2217 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2218 self._match(TokenType.EQ) 2219 self._match(TokenType.ALIAS) 2220 2221 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2222 2223 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2224 properties = [] 2225 while True: 2226 if before: 2227 prop = self._parse_property_before() 2228 else: 2229 prop = self._parse_property() 2230 if not prop: 2231 break 2232 for p in ensure_list(prop): 2233 properties.append(p) 2234 2235 if properties: 2236 return self.expression(exp.Properties, expressions=properties) 2237 2238 return None 2239 2240 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2241 return self.expression( 2242 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2243 ) 2244 2245 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2246 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2247 security_specifier = self._prev.text.upper() 2248 return self.expression(exp.SecurityProperty, this=security_specifier) 2249 return None 2250 2251 def _parse_settings_property(self) -> exp.SettingsProperty: 2252 return self.expression( 2253 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2254 ) 2255 2256 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2257 if self._index >= 2: 2258 pre_volatile_token = self._tokens[self._index - 2] 2259 else: 2260 pre_volatile_token = None 2261 2262 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2263 return exp.VolatileProperty() 2264 2265 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2266 2267 def _parse_retention_period(self) -> exp.Var: 2268 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2269 number = self._parse_number() 2270 number_str = f"{number} " if number else "" 2271 unit = self._parse_var(any_token=True) 2272 return exp.var(f"{number_str}{unit}") 2273 2274 def _parse_system_versioning_property( 2275 self, with_: bool = False 2276 ) -> exp.WithSystemVersioningProperty: 2277 self._match(TokenType.EQ) 2278 prop = self.expression( 2279 exp.WithSystemVersioningProperty, 2280 **{ # type: ignore 2281 "on": True, 2282 "with": with_, 2283 }, 2284 ) 2285 2286 if self._match_text_seq("OFF"): 2287 prop.set("on", False) 2288 return prop 2289 2290 self._match(TokenType.ON) 2291 if self._match(TokenType.L_PAREN): 2292 while self._curr and not self._match(TokenType.R_PAREN): 2293 if self._match_text_seq("HISTORY_TABLE", "="): 2294 prop.set("this", self._parse_table_parts()) 2295 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2296 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2297 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2298 prop.set("retention_period", self._parse_retention_period()) 2299 2300 self._match(TokenType.COMMA) 2301 2302 return prop 2303 2304 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2305 self._match(TokenType.EQ) 2306 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2307 prop = self.expression(exp.DataDeletionProperty, on=on) 2308 2309 if self._match(TokenType.L_PAREN): 2310 while self._curr and not self._match(TokenType.R_PAREN): 2311 if self._match_text_seq("FILTER_COLUMN", "="): 2312 prop.set("filter_column", self._parse_column()) 2313 elif self._match_text_seq("RETENTION_PERIOD", "="): 2314 prop.set("retention_period", self._parse_retention_period()) 2315 2316 self._match(TokenType.COMMA) 2317 2318 return prop 2319 2320 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2321 kind = "HASH" 2322 expressions: t.Optional[t.List[exp.Expression]] = None 2323 if self._match_text_seq("BY", "HASH"): 2324 expressions = self._parse_wrapped_csv(self._parse_id_var) 2325 elif self._match_text_seq("BY", "RANDOM"): 2326 kind = "RANDOM" 2327 2328 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2329 buckets: t.Optional[exp.Expression] = None 2330 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2331 buckets = self._parse_number() 2332 2333 return self.expression( 2334 exp.DistributedByProperty, 2335 expressions=expressions, 2336 kind=kind, 2337 buckets=buckets, 2338 order=self._parse_order(), 2339 ) 2340 2341 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2342 self._match_text_seq("KEY") 2343 expressions = self._parse_wrapped_id_vars() 2344 return self.expression(expr_type, expressions=expressions) 2345 2346 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2347 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2348 prop = self._parse_system_versioning_property(with_=True) 2349 self._match_r_paren() 2350 return prop 2351 2352 if self._match(TokenType.L_PAREN, advance=False): 2353 return self._parse_wrapped_properties() 2354 2355 if self._match_text_seq("JOURNAL"): 2356 return self._parse_withjournaltable() 2357 2358 if self._match_texts(self.VIEW_ATTRIBUTES): 2359 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2360 2361 if self._match_text_seq("DATA"): 2362 return self._parse_withdata(no=False) 2363 elif self._match_text_seq("NO", "DATA"): 2364 return self._parse_withdata(no=True) 2365 2366 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2367 return self._parse_serde_properties(with_=True) 2368 2369 if self._match(TokenType.SCHEMA): 2370 return self.expression( 2371 exp.WithSchemaBindingProperty, 2372 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2373 ) 2374 2375 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2376 return self.expression( 2377 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2378 ) 2379 2380 if not self._next: 2381 return None 2382 2383 return self._parse_withisolatedloading() 2384 2385 def _parse_procedure_option(self) -> exp.Expression | None: 2386 if self._match_text_seq("EXECUTE", "AS"): 2387 return self.expression( 2388 exp.ExecuteAsProperty, 2389 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2390 or self._parse_string(), 2391 ) 2392 2393 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2394 2395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2397 self._match(TokenType.EQ) 2398 2399 user = self._parse_id_var() 2400 self._match(TokenType.PARAMETER) 2401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2402 2403 if not user or not host: 2404 return None 2405 2406 return exp.DefinerProperty(this=f"{user}@{host}") 2407 2408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2409 self._match(TokenType.TABLE) 2410 self._match(TokenType.EQ) 2411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2412 2413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2414 return self.expression(exp.LogProperty, no=no) 2415 2416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2417 return self.expression(exp.JournalProperty, **kwargs) 2418 2419 def _parse_checksum(self) -> exp.ChecksumProperty: 2420 self._match(TokenType.EQ) 2421 2422 on = None 2423 if self._match(TokenType.ON): 2424 on = True 2425 elif self._match_text_seq("OFF"): 2426 on = False 2427 2428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2429 2430 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2431 return self.expression( 2432 exp.Cluster, 2433 expressions=( 2434 self._parse_wrapped_csv(self._parse_ordered) 2435 if wrapped 2436 else self._parse_csv(self._parse_ordered) 2437 ), 2438 ) 2439 2440 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2441 self._match_text_seq("BY") 2442 2443 self._match_l_paren() 2444 expressions = self._parse_csv(self._parse_column) 2445 self._match_r_paren() 2446 2447 if self._match_text_seq("SORTED", "BY"): 2448 self._match_l_paren() 2449 sorted_by = self._parse_csv(self._parse_ordered) 2450 self._match_r_paren() 2451 else: 2452 sorted_by = None 2453 2454 self._match(TokenType.INTO) 2455 buckets = self._parse_number() 2456 self._match_text_seq("BUCKETS") 2457 2458 return self.expression( 2459 exp.ClusteredByProperty, 2460 expressions=expressions, 2461 sorted_by=sorted_by, 2462 buckets=buckets, 2463 ) 2464 2465 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2466 if not self._match_text_seq("GRANTS"): 2467 self._retreat(self._index - 1) 2468 return None 2469 2470 return self.expression(exp.CopyGrantsProperty) 2471 2472 def _parse_freespace(self) -> exp.FreespaceProperty: 2473 self._match(TokenType.EQ) 2474 return self.expression( 2475 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2476 ) 2477 2478 def _parse_mergeblockratio( 2479 self, no: bool = False, default: bool = False 2480 ) -> exp.MergeBlockRatioProperty: 2481 if self._match(TokenType.EQ): 2482 return self.expression( 2483 exp.MergeBlockRatioProperty, 2484 this=self._parse_number(), 2485 percent=self._match(TokenType.PERCENT), 2486 ) 2487 2488 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2489 2490 def _parse_datablocksize( 2491 self, 2492 default: t.Optional[bool] = None, 2493 minimum: t.Optional[bool] = None, 2494 maximum: t.Optional[bool] = None, 2495 ) -> exp.DataBlocksizeProperty: 2496 self._match(TokenType.EQ) 2497 size = self._parse_number() 2498 2499 units = None 2500 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2501 units = self._prev.text 2502 2503 return self.expression( 2504 exp.DataBlocksizeProperty, 2505 size=size, 2506 units=units, 2507 default=default, 2508 minimum=minimum, 2509 maximum=maximum, 2510 ) 2511 2512 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2513 self._match(TokenType.EQ) 2514 always = self._match_text_seq("ALWAYS") 2515 manual = self._match_text_seq("MANUAL") 2516 never = self._match_text_seq("NEVER") 2517 default = self._match_text_seq("DEFAULT") 2518 2519 autotemp = None 2520 if self._match_text_seq("AUTOTEMP"): 2521 autotemp = self._parse_schema() 2522 2523 return self.expression( 2524 exp.BlockCompressionProperty, 2525 always=always, 2526 manual=manual, 2527 never=never, 2528 default=default, 2529 autotemp=autotemp, 2530 ) 2531 2532 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2533 index = self._index 2534 no = self._match_text_seq("NO") 2535 concurrent = self._match_text_seq("CONCURRENT") 2536 2537 if not self._match_text_seq("ISOLATED", "LOADING"): 2538 self._retreat(index) 2539 return None 2540 2541 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2542 return self.expression( 2543 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2544 ) 2545 2546 def _parse_locking(self) -> exp.LockingProperty: 2547 if self._match(TokenType.TABLE): 2548 kind = "TABLE" 2549 elif self._match(TokenType.VIEW): 2550 kind = "VIEW" 2551 elif self._match(TokenType.ROW): 2552 kind = "ROW" 2553 elif self._match_text_seq("DATABASE"): 2554 kind = "DATABASE" 2555 else: 2556 kind = None 2557 2558 if kind in ("DATABASE", "TABLE", "VIEW"): 2559 this = self._parse_table_parts() 2560 else: 2561 this = None 2562 2563 if self._match(TokenType.FOR): 2564 for_or_in = "FOR" 2565 elif self._match(TokenType.IN): 2566 for_or_in = "IN" 2567 else: 2568 for_or_in = None 2569 2570 if self._match_text_seq("ACCESS"): 2571 lock_type = "ACCESS" 2572 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2573 lock_type = "EXCLUSIVE" 2574 elif self._match_text_seq("SHARE"): 2575 lock_type = "SHARE" 2576 elif self._match_text_seq("READ"): 2577 lock_type = "READ" 2578 elif self._match_text_seq("WRITE"): 2579 lock_type = "WRITE" 2580 elif self._match_text_seq("CHECKSUM"): 2581 lock_type = "CHECKSUM" 2582 else: 2583 lock_type = None 2584 2585 override = self._match_text_seq("OVERRIDE") 2586 2587 return self.expression( 2588 exp.LockingProperty, 2589 this=this, 2590 kind=kind, 2591 for_or_in=for_or_in, 2592 lock_type=lock_type, 2593 override=override, 2594 ) 2595 2596 def _parse_partition_by(self) -> t.List[exp.Expression]: 2597 if self._match(TokenType.PARTITION_BY): 2598 return self._parse_csv(self._parse_assignment) 2599 return [] 2600 2601 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2602 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2603 if self._match_text_seq("MINVALUE"): 2604 return exp.var("MINVALUE") 2605 if self._match_text_seq("MAXVALUE"): 2606 return exp.var("MAXVALUE") 2607 return self._parse_bitwise() 2608 2609 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2610 expression = None 2611 from_expressions = None 2612 to_expressions = None 2613 2614 if self._match(TokenType.IN): 2615 this = self._parse_wrapped_csv(self._parse_bitwise) 2616 elif self._match(TokenType.FROM): 2617 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2618 self._match_text_seq("TO") 2619 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2620 elif self._match_text_seq("WITH", "(", "MODULUS"): 2621 this = self._parse_number() 2622 self._match_text_seq(",", "REMAINDER") 2623 expression = self._parse_number() 2624 self._match_r_paren() 2625 else: 2626 self.raise_error("Failed to parse partition bound spec.") 2627 2628 return self.expression( 2629 exp.PartitionBoundSpec, 2630 this=this, 2631 expression=expression, 2632 from_expressions=from_expressions, 2633 to_expressions=to_expressions, 2634 ) 2635 2636 # https://www.postgresql.org/docs/current/sql-createtable.html 2637 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2638 if not self._match_text_seq("OF"): 2639 self._retreat(self._index - 1) 2640 return None 2641 2642 this = self._parse_table(schema=True) 2643 2644 if self._match(TokenType.DEFAULT): 2645 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2646 elif self._match_text_seq("FOR", "VALUES"): 2647 expression = self._parse_partition_bound_spec() 2648 else: 2649 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2650 2651 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2652 2653 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2654 self._match(TokenType.EQ) 2655 return self.expression( 2656 exp.PartitionedByProperty, 2657 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2658 ) 2659 2660 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2661 if self._match_text_seq("AND", "STATISTICS"): 2662 statistics = True 2663 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2664 statistics = False 2665 else: 2666 statistics = None 2667 2668 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2669 2670 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2671 if self._match_text_seq("SQL"): 2672 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2673 return None 2674 2675 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2676 if self._match_text_seq("SQL", "DATA"): 2677 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2678 return None 2679 2680 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("PRIMARY", "INDEX"): 2682 return exp.NoPrimaryIndexProperty() 2683 if self._match_text_seq("SQL"): 2684 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2685 return None 2686 2687 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2688 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2689 return exp.OnCommitProperty() 2690 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2691 return exp.OnCommitProperty(delete=True) 2692 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2693 2694 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2695 if self._match_text_seq("SQL", "DATA"): 2696 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2697 return None 2698 2699 def _parse_distkey(self) -> exp.DistKeyProperty: 2700 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2701 2702 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2703 table = self._parse_table(schema=True) 2704 2705 options = [] 2706 while self._match_texts(("INCLUDING", "EXCLUDING")): 2707 this = self._prev.text.upper() 2708 2709 id_var = self._parse_id_var() 2710 if not id_var: 2711 return None 2712 2713 options.append( 2714 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2715 ) 2716 2717 return self.expression(exp.LikeProperty, this=table, expressions=options) 2718 2719 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2720 return self.expression( 2721 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2722 ) 2723 2724 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2725 self._match(TokenType.EQ) 2726 return self.expression( 2727 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2728 ) 2729 2730 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2731 self._match_text_seq("WITH", "CONNECTION") 2732 return self.expression( 2733 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2734 ) 2735 2736 def _parse_returns(self) -> exp.ReturnsProperty: 2737 value: t.Optional[exp.Expression] 2738 null = None 2739 is_table = self._match(TokenType.TABLE) 2740 2741 if is_table: 2742 if self._match(TokenType.LT): 2743 value = self.expression( 2744 exp.Schema, 2745 this="TABLE", 2746 expressions=self._parse_csv(self._parse_struct_types), 2747 ) 2748 if not self._match(TokenType.GT): 2749 self.raise_error("Expecting >") 2750 else: 2751 value = self._parse_schema(exp.var("TABLE")) 2752 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2753 null = True 2754 value = None 2755 else: 2756 value = self._parse_types() 2757 2758 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2759 2760 def _parse_describe(self) -> exp.Describe: 2761 kind = self._match_set(self.CREATABLES) and self._prev.text 2762 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2763 if self._match(TokenType.DOT): 2764 style = None 2765 self._retreat(self._index - 2) 2766 2767 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2768 2769 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2770 this = self._parse_statement() 2771 else: 2772 this = self._parse_table(schema=True) 2773 2774 properties = self._parse_properties() 2775 expressions = properties.expressions if properties else None 2776 partition = self._parse_partition() 2777 return self.expression( 2778 exp.Describe, 2779 this=this, 2780 style=style, 2781 kind=kind, 2782 expressions=expressions, 2783 partition=partition, 2784 format=format, 2785 ) 2786 2787 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2788 kind = self._prev.text.upper() 2789 expressions = [] 2790 2791 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2792 if self._match(TokenType.WHEN): 2793 expression = self._parse_disjunction() 2794 self._match(TokenType.THEN) 2795 else: 2796 expression = None 2797 2798 else_ = self._match(TokenType.ELSE) 2799 2800 if not self._match(TokenType.INTO): 2801 return None 2802 2803 return self.expression( 2804 exp.ConditionalInsert, 2805 this=self.expression( 2806 exp.Insert, 2807 this=self._parse_table(schema=True), 2808 expression=self._parse_derived_table_values(), 2809 ), 2810 expression=expression, 2811 else_=else_, 2812 ) 2813 2814 expression = parse_conditional_insert() 2815 while expression is not None: 2816 expressions.append(expression) 2817 expression = parse_conditional_insert() 2818 2819 return self.expression( 2820 exp.MultitableInserts, 2821 kind=kind, 2822 comments=comments, 2823 expressions=expressions, 2824 source=self._parse_table(), 2825 ) 2826 2827 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2828 comments = [] 2829 hint = self._parse_hint() 2830 overwrite = self._match(TokenType.OVERWRITE) 2831 ignore = self._match(TokenType.IGNORE) 2832 local = self._match_text_seq("LOCAL") 2833 alternative = None 2834 is_function = None 2835 2836 if self._match_text_seq("DIRECTORY"): 2837 this: t.Optional[exp.Expression] = self.expression( 2838 exp.Directory, 2839 this=self._parse_var_or_string(), 2840 local=local, 2841 row_format=self._parse_row_format(match_row=True), 2842 ) 2843 else: 2844 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2845 comments += ensure_list(self._prev_comments) 2846 return self._parse_multitable_inserts(comments) 2847 2848 if self._match(TokenType.OR): 2849 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2850 2851 self._match(TokenType.INTO) 2852 comments += ensure_list(self._prev_comments) 2853 self._match(TokenType.TABLE) 2854 is_function = self._match(TokenType.FUNCTION) 2855 2856 this = ( 2857 self._parse_table(schema=True, parse_partition=True) 2858 if not is_function 2859 else self._parse_function() 2860 ) 2861 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2862 this.set("alias", self._parse_table_alias()) 2863 2864 returning = self._parse_returning() 2865 2866 return self.expression( 2867 exp.Insert, 2868 comments=comments, 2869 hint=hint, 2870 is_function=is_function, 2871 this=this, 2872 stored=self._match_text_seq("STORED") and self._parse_stored(), 2873 by_name=self._match_text_seq("BY", "NAME"), 2874 exists=self._parse_exists(), 2875 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2876 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2877 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2878 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2879 conflict=self._parse_on_conflict(), 2880 returning=returning or self._parse_returning(), 2881 overwrite=overwrite, 2882 alternative=alternative, 2883 ignore=ignore, 2884 source=self._match(TokenType.TABLE) and self._parse_table(), 2885 ) 2886 2887 def _parse_kill(self) -> exp.Kill: 2888 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2889 2890 return self.expression( 2891 exp.Kill, 2892 this=self._parse_primary(), 2893 kind=kind, 2894 ) 2895 2896 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2897 conflict = self._match_text_seq("ON", "CONFLICT") 2898 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2899 2900 if not conflict and not duplicate: 2901 return None 2902 2903 conflict_keys = None 2904 constraint = None 2905 2906 if conflict: 2907 if self._match_text_seq("ON", "CONSTRAINT"): 2908 constraint = self._parse_id_var() 2909 elif self._match(TokenType.L_PAREN): 2910 conflict_keys = self._parse_csv(self._parse_id_var) 2911 self._match_r_paren() 2912 2913 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2914 if self._prev.token_type == TokenType.UPDATE: 2915 self._match(TokenType.SET) 2916 expressions = self._parse_csv(self._parse_equality) 2917 else: 2918 expressions = None 2919 2920 return self.expression( 2921 exp.OnConflict, 2922 duplicate=duplicate, 2923 expressions=expressions, 2924 action=action, 2925 conflict_keys=conflict_keys, 2926 constraint=constraint, 2927 where=self._parse_where(), 2928 ) 2929 2930 def _parse_returning(self) -> t.Optional[exp.Returning]: 2931 if not self._match(TokenType.RETURNING): 2932 return None 2933 return self.expression( 2934 exp.Returning, 2935 expressions=self._parse_csv(self._parse_expression), 2936 into=self._match(TokenType.INTO) and self._parse_table_part(), 2937 ) 2938 2939 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2940 if not self._match(TokenType.FORMAT): 2941 return None 2942 return self._parse_row_format() 2943 2944 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2945 index = self._index 2946 with_ = with_ or self._match_text_seq("WITH") 2947 2948 if not self._match(TokenType.SERDE_PROPERTIES): 2949 self._retreat(index) 2950 return None 2951 return self.expression( 2952 exp.SerdeProperties, 2953 **{ # type: ignore 2954 "expressions": self._parse_wrapped_properties(), 2955 "with": with_, 2956 }, 2957 ) 2958 2959 def _parse_row_format( 2960 self, match_row: bool = False 2961 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2962 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2963 return None 2964 2965 if self._match_text_seq("SERDE"): 2966 this = self._parse_string() 2967 2968 serde_properties = self._parse_serde_properties() 2969 2970 return self.expression( 2971 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2972 ) 2973 2974 self._match_text_seq("DELIMITED") 2975 2976 kwargs = {} 2977 2978 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2979 kwargs["fields"] = self._parse_string() 2980 if self._match_text_seq("ESCAPED", "BY"): 2981 kwargs["escaped"] = self._parse_string() 2982 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2983 kwargs["collection_items"] = self._parse_string() 2984 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2985 kwargs["map_keys"] = self._parse_string() 2986 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2987 kwargs["lines"] = self._parse_string() 2988 if self._match_text_seq("NULL", "DEFINED", "AS"): 2989 kwargs["null"] = self._parse_string() 2990 2991 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2992 2993 def _parse_load(self) -> exp.LoadData | exp.Command: 2994 if self._match_text_seq("DATA"): 2995 local = self._match_text_seq("LOCAL") 2996 self._match_text_seq("INPATH") 2997 inpath = self._parse_string() 2998 overwrite = self._match(TokenType.OVERWRITE) 2999 self._match_pair(TokenType.INTO, TokenType.TABLE) 3000 3001 return self.expression( 3002 exp.LoadData, 3003 this=self._parse_table(schema=True), 3004 local=local, 3005 overwrite=overwrite, 3006 inpath=inpath, 3007 partition=self._parse_partition(), 3008 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3009 serde=self._match_text_seq("SERDE") and self._parse_string(), 3010 ) 3011 return self._parse_as_command(self._prev) 3012 3013 def _parse_delete(self) -> exp.Delete: 3014 # This handles MySQL's "Multiple-Table Syntax" 3015 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3016 tables = None 3017 if not self._match(TokenType.FROM, advance=False): 3018 tables = self._parse_csv(self._parse_table) or None 3019 3020 returning = self._parse_returning() 3021 3022 return self.expression( 3023 exp.Delete, 3024 tables=tables, 3025 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3026 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3027 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3028 where=self._parse_where(), 3029 returning=returning or self._parse_returning(), 3030 limit=self._parse_limit(), 3031 ) 3032 3033 def _parse_update(self) -> exp.Update: 3034 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3035 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3036 returning = self._parse_returning() 3037 return self.expression( 3038 exp.Update, 3039 **{ # type: ignore 3040 "this": this, 3041 "expressions": expressions, 3042 "from": self._parse_from(joins=True), 3043 "where": self._parse_where(), 3044 "returning": returning or self._parse_returning(), 3045 "order": self._parse_order(), 3046 "limit": self._parse_limit(), 3047 }, 3048 ) 3049 3050 def _parse_use(self) -> exp.Use: 3051 return self.expression( 3052 exp.Use, 3053 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3054 this=self._parse_table(schema=False), 3055 ) 3056 3057 def _parse_uncache(self) -> exp.Uncache: 3058 if not self._match(TokenType.TABLE): 3059 self.raise_error("Expecting TABLE after UNCACHE") 3060 3061 return self.expression( 3062 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3063 ) 3064 3065 def _parse_cache(self) -> exp.Cache: 3066 lazy = self._match_text_seq("LAZY") 3067 self._match(TokenType.TABLE) 3068 table = self._parse_table(schema=True) 3069 3070 options = [] 3071 if self._match_text_seq("OPTIONS"): 3072 self._match_l_paren() 3073 k = self._parse_string() 3074 self._match(TokenType.EQ) 3075 v = self._parse_string() 3076 options = [k, v] 3077 self._match_r_paren() 3078 3079 self._match(TokenType.ALIAS) 3080 return self.expression( 3081 exp.Cache, 3082 this=table, 3083 lazy=lazy, 3084 options=options, 3085 expression=self._parse_select(nested=True), 3086 ) 3087 3088 def _parse_partition(self) -> t.Optional[exp.Partition]: 3089 if not self._match_texts(self.PARTITION_KEYWORDS): 3090 return None 3091 3092 return self.expression( 3093 exp.Partition, 3094 subpartition=self._prev.text.upper() == "SUBPARTITION", 3095 expressions=self._parse_wrapped_csv(self._parse_assignment), 3096 ) 3097 3098 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3099 def _parse_value_expression() -> t.Optional[exp.Expression]: 3100 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3101 return exp.var(self._prev.text.upper()) 3102 return self._parse_expression() 3103 3104 if self._match(TokenType.L_PAREN): 3105 expressions = self._parse_csv(_parse_value_expression) 3106 self._match_r_paren() 3107 return self.expression(exp.Tuple, expressions=expressions) 3108 3109 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3110 expression = self._parse_expression() 3111 if expression: 3112 return self.expression(exp.Tuple, expressions=[expression]) 3113 return None 3114 3115 def _parse_projections(self) -> t.List[exp.Expression]: 3116 return self._parse_expressions() 3117 3118 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3119 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3120 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3121 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3122 ) 3123 elif self._match(TokenType.FROM): 3124 from_ = self._parse_from(skip_from_token=True) 3125 # Support parentheses for duckdb FROM-first syntax 3126 select = self._parse_select() 3127 if select: 3128 select.set("from", from_) 3129 this = select 3130 else: 3131 this = exp.select("*").from_(t.cast(exp.From, from_)) 3132 else: 3133 this = ( 3134 self._parse_table() 3135 if table 3136 else self._parse_select(nested=True, parse_set_operation=False) 3137 ) 3138 3139 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3140 # in case a modifier (e.g. join) is following 3141 if table and isinstance(this, exp.Values) and this.alias: 3142 alias = this.args["alias"].pop() 3143 this = exp.Table(this=this, alias=alias) 3144 3145 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3146 3147 return this 3148 3149 def _parse_select( 3150 self, 3151 nested: bool = False, 3152 table: bool = False, 3153 parse_subquery_alias: bool = True, 3154 parse_set_operation: bool = True, 3155 ) -> t.Optional[exp.Expression]: 3156 cte = self._parse_with() 3157 3158 if cte: 3159 this = self._parse_statement() 3160 3161 if not this: 3162 self.raise_error("Failed to parse any statement following CTE") 3163 return cte 3164 3165 if "with" in this.arg_types: 3166 this.set("with", cte) 3167 else: 3168 self.raise_error(f"{this.key} does not support CTE") 3169 this = cte 3170 3171 return this 3172 3173 # duckdb supports leading with FROM x 3174 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3175 3176 if self._match(TokenType.SELECT): 3177 comments = self._prev_comments 3178 3179 hint = self._parse_hint() 3180 3181 if self._next and not self._next.token_type == TokenType.DOT: 3182 all_ = self._match(TokenType.ALL) 3183 distinct = self._match_set(self.DISTINCT_TOKENS) 3184 else: 3185 all_, distinct = None, None 3186 3187 kind = ( 3188 self._match(TokenType.ALIAS) 3189 and self._match_texts(("STRUCT", "VALUE")) 3190 and self._prev.text.upper() 3191 ) 3192 3193 if distinct: 3194 distinct = self.expression( 3195 exp.Distinct, 3196 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3197 ) 3198 3199 if all_ and distinct: 3200 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3201 3202 operation_modifiers = [] 3203 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3204 operation_modifiers.append(exp.var(self._prev.text.upper())) 3205 3206 limit = self._parse_limit(top=True) 3207 projections = self._parse_projections() 3208 3209 this = self.expression( 3210 exp.Select, 3211 kind=kind, 3212 hint=hint, 3213 distinct=distinct, 3214 expressions=projections, 3215 limit=limit, 3216 operation_modifiers=operation_modifiers or None, 3217 ) 3218 this.comments = comments 3219 3220 into = self._parse_into() 3221 if into: 3222 this.set("into", into) 3223 3224 if not from_: 3225 from_ = self._parse_from() 3226 3227 if from_: 3228 this.set("from", from_) 3229 3230 this = self._parse_query_modifiers(this) 3231 elif (table or nested) and self._match(TokenType.L_PAREN): 3232 this = self._parse_wrapped_select(table=table) 3233 3234 # We return early here so that the UNION isn't attached to the subquery by the 3235 # following call to _parse_set_operations, but instead becomes the parent node 3236 self._match_r_paren() 3237 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3238 elif self._match(TokenType.VALUES, advance=False): 3239 this = self._parse_derived_table_values() 3240 elif from_: 3241 this = exp.select("*").from_(from_.this, copy=False) 3242 elif self._match(TokenType.SUMMARIZE): 3243 table = self._match(TokenType.TABLE) 3244 this = self._parse_select() or self._parse_string() or self._parse_table() 3245 return self.expression(exp.Summarize, this=this, table=table) 3246 elif self._match(TokenType.DESCRIBE): 3247 this = self._parse_describe() 3248 elif self._match_text_seq("STREAM"): 3249 this = self._parse_function() 3250 if this: 3251 this = self.expression(exp.Stream, this=this) 3252 else: 3253 self._retreat(self._index - 1) 3254 else: 3255 this = None 3256 3257 return self._parse_set_operations(this) if parse_set_operation else this 3258 3259 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3260 self._match_text_seq("SEARCH") 3261 3262 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3263 3264 if not kind: 3265 return None 3266 3267 self._match_text_seq("FIRST", "BY") 3268 3269 return self.expression( 3270 exp.RecursiveWithSearch, 3271 kind=kind, 3272 this=self._parse_id_var(), 3273 expression=self._match_text_seq("SET") and self._parse_id_var(), 3274 using=self._match_text_seq("USING") and self._parse_id_var(), 3275 ) 3276 3277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3278 if not skip_with_token and not self._match(TokenType.WITH): 3279 return None 3280 3281 comments = self._prev_comments 3282 recursive = self._match(TokenType.RECURSIVE) 3283 3284 last_comments = None 3285 expressions = [] 3286 while True: 3287 cte = self._parse_cte() 3288 if isinstance(cte, exp.CTE): 3289 expressions.append(cte) 3290 if last_comments: 3291 cte.add_comments(last_comments) 3292 3293 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3294 break 3295 else: 3296 self._match(TokenType.WITH) 3297 3298 last_comments = self._prev_comments 3299 3300 return self.expression( 3301 exp.With, 3302 comments=comments, 3303 expressions=expressions, 3304 recursive=recursive, 3305 search=self._parse_recursive_with_search(), 3306 ) 3307 3308 def _parse_cte(self) -> t.Optional[exp.CTE]: 3309 index = self._index 3310 3311 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3312 if not alias or not alias.this: 3313 self.raise_error("Expected CTE to have alias") 3314 3315 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3316 self._retreat(index) 3317 return None 3318 3319 comments = self._prev_comments 3320 3321 if self._match_text_seq("NOT", "MATERIALIZED"): 3322 materialized = False 3323 elif self._match_text_seq("MATERIALIZED"): 3324 materialized = True 3325 else: 3326 materialized = None 3327 3328 cte = self.expression( 3329 exp.CTE, 3330 this=self._parse_wrapped(self._parse_statement), 3331 alias=alias, 3332 materialized=materialized, 3333 comments=comments, 3334 ) 3335 3336 if isinstance(cte.this, exp.Values): 3337 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3338 3339 return cte 3340 3341 def _parse_table_alias( 3342 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3343 ) -> t.Optional[exp.TableAlias]: 3344 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3345 # so this section tries to parse the clause version and if it fails, it treats the token 3346 # as an identifier (alias) 3347 if self._can_parse_limit_or_offset(): 3348 return None 3349 3350 any_token = self._match(TokenType.ALIAS) 3351 alias = ( 3352 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3353 or self._parse_string_as_identifier() 3354 ) 3355 3356 index = self._index 3357 if self._match(TokenType.L_PAREN): 3358 columns = self._parse_csv(self._parse_function_parameter) 3359 self._match_r_paren() if columns else self._retreat(index) 3360 else: 3361 columns = None 3362 3363 if not alias and not columns: 3364 return None 3365 3366 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3367 3368 # We bubble up comments from the Identifier to the TableAlias 3369 if isinstance(alias, exp.Identifier): 3370 table_alias.add_comments(alias.pop_comments()) 3371 3372 return table_alias 3373 3374 def _parse_subquery( 3375 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3376 ) -> t.Optional[exp.Subquery]: 3377 if not this: 3378 return None 3379 3380 return self.expression( 3381 exp.Subquery, 3382 this=this, 3383 pivots=self._parse_pivots(), 3384 alias=self._parse_table_alias() if parse_alias else None, 3385 sample=self._parse_table_sample(), 3386 ) 3387 3388 def _implicit_unnests_to_explicit(self, this: E) -> E: 3389 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3390 3391 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3392 for i, join in enumerate(this.args.get("joins") or []): 3393 table = join.this 3394 normalized_table = table.copy() 3395 normalized_table.meta["maybe_column"] = True 3396 normalized_table = _norm(normalized_table, dialect=self.dialect) 3397 3398 if isinstance(table, exp.Table) and not join.args.get("on"): 3399 if normalized_table.parts[0].name in refs: 3400 table_as_column = table.to_column() 3401 unnest = exp.Unnest(expressions=[table_as_column]) 3402 3403 # Table.to_column creates a parent Alias node that we want to convert to 3404 # a TableAlias and attach to the Unnest, so it matches the parser's output 3405 if isinstance(table.args.get("alias"), exp.TableAlias): 3406 table_as_column.replace(table_as_column.this) 3407 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3408 3409 table.replace(unnest) 3410 3411 refs.add(normalized_table.alias_or_name) 3412 3413 return this 3414 3415 def _parse_query_modifiers( 3416 self, this: t.Optional[exp.Expression] 3417 ) -> t.Optional[exp.Expression]: 3418 if isinstance(this, self.MODIFIABLES): 3419 for join in self._parse_joins(): 3420 this.append("joins", join) 3421 for lateral in iter(self._parse_lateral, None): 3422 this.append("laterals", lateral) 3423 3424 while True: 3425 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3426 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3427 key, expression = parser(self) 3428 3429 if expression: 3430 this.set(key, expression) 3431 if key == "limit": 3432 offset = expression.args.pop("offset", None) 3433 3434 if offset: 3435 offset = exp.Offset(expression=offset) 3436 this.set("offset", offset) 3437 3438 limit_by_expressions = expression.expressions 3439 expression.set("expressions", None) 3440 offset.set("expressions", limit_by_expressions) 3441 continue 3442 break 3443 3444 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3445 this = self._implicit_unnests_to_explicit(this) 3446 3447 return this 3448 3449 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3450 start = self._curr 3451 while self._curr: 3452 self._advance() 3453 3454 end = self._tokens[self._index - 1] 3455 return exp.Hint(expressions=[self._find_sql(start, end)]) 3456 3457 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3458 return self._parse_function_call() 3459 3460 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3461 start_index = self._index 3462 should_fallback_to_string = False 3463 3464 hints = [] 3465 try: 3466 for hint in iter( 3467 lambda: self._parse_csv( 3468 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3469 ), 3470 [], 3471 ): 3472 hints.extend(hint) 3473 except ParseError: 3474 should_fallback_to_string = True 3475 3476 if should_fallback_to_string or self._curr: 3477 self._retreat(start_index) 3478 return self._parse_hint_fallback_to_string() 3479 3480 return self.expression(exp.Hint, expressions=hints) 3481 3482 def _parse_hint(self) -> t.Optional[exp.Hint]: 3483 if self._match(TokenType.HINT) and self._prev_comments: 3484 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3485 3486 return None 3487 3488 def _parse_into(self) -> t.Optional[exp.Into]: 3489 if not self._match(TokenType.INTO): 3490 return None 3491 3492 temp = self._match(TokenType.TEMPORARY) 3493 unlogged = self._match_text_seq("UNLOGGED") 3494 self._match(TokenType.TABLE) 3495 3496 return self.expression( 3497 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3498 ) 3499 3500 def _parse_from( 3501 self, joins: bool = False, skip_from_token: bool = False 3502 ) -> t.Optional[exp.From]: 3503 if not skip_from_token and not self._match(TokenType.FROM): 3504 return None 3505 3506 return self.expression( 3507 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3508 ) 3509 3510 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3511 return self.expression( 3512 exp.MatchRecognizeMeasure, 3513 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3514 this=self._parse_expression(), 3515 ) 3516 3517 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3518 if not self._match(TokenType.MATCH_RECOGNIZE): 3519 return None 3520 3521 self._match_l_paren() 3522 3523 partition = self._parse_partition_by() 3524 order = self._parse_order() 3525 3526 measures = ( 3527 self._parse_csv(self._parse_match_recognize_measure) 3528 if self._match_text_seq("MEASURES") 3529 else None 3530 ) 3531 3532 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3533 rows = exp.var("ONE ROW PER MATCH") 3534 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3535 text = "ALL ROWS PER MATCH" 3536 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3537 text += " SHOW EMPTY MATCHES" 3538 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3539 text += " OMIT EMPTY MATCHES" 3540 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3541 text += " WITH UNMATCHED ROWS" 3542 rows = exp.var(text) 3543 else: 3544 rows = None 3545 3546 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3547 text = "AFTER MATCH SKIP" 3548 if self._match_text_seq("PAST", "LAST", "ROW"): 3549 text += " PAST LAST ROW" 3550 elif self._match_text_seq("TO", "NEXT", "ROW"): 3551 text += " TO NEXT ROW" 3552 elif self._match_text_seq("TO", "FIRST"): 3553 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3554 elif self._match_text_seq("TO", "LAST"): 3555 text += f" TO LAST {self._advance_any().text}" # type: ignore 3556 after = exp.var(text) 3557 else: 3558 after = None 3559 3560 if self._match_text_seq("PATTERN"): 3561 self._match_l_paren() 3562 3563 if not self._curr: 3564 self.raise_error("Expecting )", self._curr) 3565 3566 paren = 1 3567 start = self._curr 3568 3569 while self._curr and paren > 0: 3570 if self._curr.token_type == TokenType.L_PAREN: 3571 paren += 1 3572 if self._curr.token_type == TokenType.R_PAREN: 3573 paren -= 1 3574 3575 end = self._prev 3576 self._advance() 3577 3578 if paren > 0: 3579 self.raise_error("Expecting )", self._curr) 3580 3581 pattern = exp.var(self._find_sql(start, end)) 3582 else: 3583 pattern = None 3584 3585 define = ( 3586 self._parse_csv(self._parse_name_as_expression) 3587 if self._match_text_seq("DEFINE") 3588 else None 3589 ) 3590 3591 self._match_r_paren() 3592 3593 return self.expression( 3594 exp.MatchRecognize, 3595 partition_by=partition, 3596 order=order, 3597 measures=measures, 3598 rows=rows, 3599 after=after, 3600 pattern=pattern, 3601 define=define, 3602 alias=self._parse_table_alias(), 3603 ) 3604 3605 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3606 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3607 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3608 cross_apply = False 3609 3610 if cross_apply is not None: 3611 this = self._parse_select(table=True) 3612 view = None 3613 outer = None 3614 elif self._match(TokenType.LATERAL): 3615 this = self._parse_select(table=True) 3616 view = self._match(TokenType.VIEW) 3617 outer = self._match(TokenType.OUTER) 3618 else: 3619 return None 3620 3621 if not this: 3622 this = ( 3623 self._parse_unnest() 3624 or self._parse_function() 3625 or self._parse_id_var(any_token=False) 3626 ) 3627 3628 while self._match(TokenType.DOT): 3629 this = exp.Dot( 3630 this=this, 3631 expression=self._parse_function() or self._parse_id_var(any_token=False), 3632 ) 3633 3634 ordinality: t.Optional[bool] = None 3635 3636 if view: 3637 table = self._parse_id_var(any_token=False) 3638 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3639 table_alias: t.Optional[exp.TableAlias] = self.expression( 3640 exp.TableAlias, this=table, columns=columns 3641 ) 3642 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3643 # We move the alias from the lateral's child node to the lateral itself 3644 table_alias = this.args["alias"].pop() 3645 else: 3646 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3647 table_alias = self._parse_table_alias() 3648 3649 return self.expression( 3650 exp.Lateral, 3651 this=this, 3652 view=view, 3653 outer=outer, 3654 alias=table_alias, 3655 cross_apply=cross_apply, 3656 ordinality=ordinality, 3657 ) 3658 3659 def _parse_join_parts( 3660 self, 3661 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3662 return ( 3663 self._match_set(self.JOIN_METHODS) and self._prev, 3664 self._match_set(self.JOIN_SIDES) and self._prev, 3665 self._match_set(self.JOIN_KINDS) and self._prev, 3666 ) 3667 3668 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3669 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3670 this = self._parse_column() 3671 if isinstance(this, exp.Column): 3672 return this.this 3673 return this 3674 3675 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3676 3677 def _parse_join( 3678 self, skip_join_token: bool = False, parse_bracket: bool = False 3679 ) -> t.Optional[exp.Join]: 3680 if self._match(TokenType.COMMA): 3681 table = self._try_parse(self._parse_table) 3682 if table: 3683 return self.expression(exp.Join, this=table) 3684 return None 3685 3686 index = self._index 3687 method, side, kind = self._parse_join_parts() 3688 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3689 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3690 3691 if not skip_join_token and not join: 3692 self._retreat(index) 3693 kind = None 3694 method = None 3695 side = None 3696 3697 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3698 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3699 3700 if not skip_join_token and not join and not outer_apply and not cross_apply: 3701 return None 3702 3703 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3704 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3705 kwargs["expressions"] = self._parse_csv( 3706 lambda: self._parse_table(parse_bracket=parse_bracket) 3707 ) 3708 3709 if method: 3710 kwargs["method"] = method.text 3711 if side: 3712 kwargs["side"] = side.text 3713 if kind: 3714 kwargs["kind"] = kind.text 3715 if hint: 3716 kwargs["hint"] = hint 3717 3718 if self._match(TokenType.MATCH_CONDITION): 3719 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3720 3721 if self._match(TokenType.ON): 3722 kwargs["on"] = self._parse_assignment() 3723 elif self._match(TokenType.USING): 3724 kwargs["using"] = self._parse_using_identifiers() 3725 elif ( 3726 not (outer_apply or cross_apply) 3727 and not isinstance(kwargs["this"], exp.Unnest) 3728 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3729 ): 3730 index = self._index 3731 joins: t.Optional[list] = list(self._parse_joins()) 3732 3733 if joins and self._match(TokenType.ON): 3734 kwargs["on"] = self._parse_assignment() 3735 elif joins and self._match(TokenType.USING): 3736 kwargs["using"] = self._parse_using_identifiers() 3737 else: 3738 joins = None 3739 self._retreat(index) 3740 3741 kwargs["this"].set("joins", joins if joins else None) 3742 3743 kwargs["pivots"] = self._parse_pivots() 3744 3745 comments = [c for token in (method, side, kind) if token for c in token.comments] 3746 return self.expression(exp.Join, comments=comments, **kwargs) 3747 3748 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3749 this = self._parse_assignment() 3750 3751 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3752 return this 3753 3754 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3755 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3756 3757 return this 3758 3759 def _parse_index_params(self) -> exp.IndexParameters: 3760 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3761 3762 if self._match(TokenType.L_PAREN, advance=False): 3763 columns = self._parse_wrapped_csv(self._parse_with_operator) 3764 else: 3765 columns = None 3766 3767 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3768 partition_by = self._parse_partition_by() 3769 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3770 tablespace = ( 3771 self._parse_var(any_token=True) 3772 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3773 else None 3774 ) 3775 where = self._parse_where() 3776 3777 on = self._parse_field() if self._match(TokenType.ON) else None 3778 3779 return self.expression( 3780 exp.IndexParameters, 3781 using=using, 3782 columns=columns, 3783 include=include, 3784 partition_by=partition_by, 3785 where=where, 3786 with_storage=with_storage, 3787 tablespace=tablespace, 3788 on=on, 3789 ) 3790 3791 def _parse_index( 3792 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3793 ) -> t.Optional[exp.Index]: 3794 if index or anonymous: 3795 unique = None 3796 primary = None 3797 amp = None 3798 3799 self._match(TokenType.ON) 3800 self._match(TokenType.TABLE) # hive 3801 table = self._parse_table_parts(schema=True) 3802 else: 3803 unique = self._match(TokenType.UNIQUE) 3804 primary = self._match_text_seq("PRIMARY") 3805 amp = self._match_text_seq("AMP") 3806 3807 if not self._match(TokenType.INDEX): 3808 return None 3809 3810 index = self._parse_id_var() 3811 table = None 3812 3813 params = self._parse_index_params() 3814 3815 return self.expression( 3816 exp.Index, 3817 this=index, 3818 table=table, 3819 unique=unique, 3820 primary=primary, 3821 amp=amp, 3822 params=params, 3823 ) 3824 3825 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3826 hints: t.List[exp.Expression] = [] 3827 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3828 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3829 hints.append( 3830 self.expression( 3831 exp.WithTableHint, 3832 expressions=self._parse_csv( 3833 lambda: self._parse_function() or self._parse_var(any_token=True) 3834 ), 3835 ) 3836 ) 3837 self._match_r_paren() 3838 else: 3839 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3840 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3841 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3842 3843 self._match_set((TokenType.INDEX, TokenType.KEY)) 3844 if self._match(TokenType.FOR): 3845 hint.set("target", self._advance_any() and self._prev.text.upper()) 3846 3847 hint.set("expressions", self._parse_wrapped_id_vars()) 3848 hints.append(hint) 3849 3850 return hints or None 3851 3852 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3853 return ( 3854 (not schema and self._parse_function(optional_parens=False)) 3855 or self._parse_id_var(any_token=False) 3856 or self._parse_string_as_identifier() 3857 or self._parse_placeholder() 3858 ) 3859 3860 def _parse_table_parts( 3861 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3862 ) -> exp.Table: 3863 catalog = None 3864 db = None 3865 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3866 3867 while self._match(TokenType.DOT): 3868 if catalog: 3869 # This allows nesting the table in arbitrarily many dot expressions if needed 3870 table = self.expression( 3871 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3872 ) 3873 else: 3874 catalog = db 3875 db = table 3876 # "" used for tsql FROM a..b case 3877 table = self._parse_table_part(schema=schema) or "" 3878 3879 if ( 3880 wildcard 3881 and self._is_connected() 3882 and (isinstance(table, exp.Identifier) or not table) 3883 and self._match(TokenType.STAR) 3884 ): 3885 if isinstance(table, exp.Identifier): 3886 table.args["this"] += "*" 3887 else: 3888 table = exp.Identifier(this="*") 3889 3890 # We bubble up comments from the Identifier to the Table 3891 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3892 3893 if is_db_reference: 3894 catalog = db 3895 db = table 3896 table = None 3897 3898 if not table and not is_db_reference: 3899 self.raise_error(f"Expected table name but got {self._curr}") 3900 if not db and is_db_reference: 3901 self.raise_error(f"Expected database name but got {self._curr}") 3902 3903 table = self.expression( 3904 exp.Table, 3905 comments=comments, 3906 this=table, 3907 db=db, 3908 catalog=catalog, 3909 ) 3910 3911 changes = self._parse_changes() 3912 if changes: 3913 table.set("changes", changes) 3914 3915 at_before = self._parse_historical_data() 3916 if at_before: 3917 table.set("when", at_before) 3918 3919 pivots = self._parse_pivots() 3920 if pivots: 3921 table.set("pivots", pivots) 3922 3923 return table 3924 3925 def _parse_table( 3926 self, 3927 schema: bool = False, 3928 joins: bool = False, 3929 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3930 parse_bracket: bool = False, 3931 is_db_reference: bool = False, 3932 parse_partition: bool = False, 3933 ) -> t.Optional[exp.Expression]: 3934 lateral = self._parse_lateral() 3935 if lateral: 3936 return lateral 3937 3938 unnest = self._parse_unnest() 3939 if unnest: 3940 return unnest 3941 3942 values = self._parse_derived_table_values() 3943 if values: 3944 return values 3945 3946 subquery = self._parse_select(table=True) 3947 if subquery: 3948 if not subquery.args.get("pivots"): 3949 subquery.set("pivots", self._parse_pivots()) 3950 return subquery 3951 3952 bracket = parse_bracket and self._parse_bracket(None) 3953 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3954 3955 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3956 self._parse_table 3957 ) 3958 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3959 3960 only = self._match(TokenType.ONLY) 3961 3962 this = t.cast( 3963 exp.Expression, 3964 bracket 3965 or rows_from 3966 or self._parse_bracket( 3967 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3968 ), 3969 ) 3970 3971 if only: 3972 this.set("only", only) 3973 3974 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3975 self._match_text_seq("*") 3976 3977 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3978 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3979 this.set("partition", self._parse_partition()) 3980 3981 if schema: 3982 return self._parse_schema(this=this) 3983 3984 version = self._parse_version() 3985 3986 if version: 3987 this.set("version", version) 3988 3989 if self.dialect.ALIAS_POST_TABLESAMPLE: 3990 this.set("sample", self._parse_table_sample()) 3991 3992 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3993 if alias: 3994 this.set("alias", alias) 3995 3996 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3997 return self.expression( 3998 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3999 ) 4000 4001 this.set("hints", self._parse_table_hints()) 4002 4003 if not this.args.get("pivots"): 4004 this.set("pivots", self._parse_pivots()) 4005 4006 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4007 this.set("sample", self._parse_table_sample()) 4008 4009 if joins: 4010 for join in self._parse_joins(): 4011 this.append("joins", join) 4012 4013 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4014 this.set("ordinality", True) 4015 this.set("alias", self._parse_table_alias()) 4016 4017 return this 4018 4019 def _parse_version(self) -> t.Optional[exp.Version]: 4020 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4021 this = "TIMESTAMP" 4022 elif self._match(TokenType.VERSION_SNAPSHOT): 4023 this = "VERSION" 4024 else: 4025 return None 4026 4027 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4028 kind = self._prev.text.upper() 4029 start = self._parse_bitwise() 4030 self._match_texts(("TO", "AND")) 4031 end = self._parse_bitwise() 4032 expression: t.Optional[exp.Expression] = self.expression( 4033 exp.Tuple, expressions=[start, end] 4034 ) 4035 elif self._match_text_seq("CONTAINED", "IN"): 4036 kind = "CONTAINED IN" 4037 expression = self.expression( 4038 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4039 ) 4040 elif self._match(TokenType.ALL): 4041 kind = "ALL" 4042 expression = None 4043 else: 4044 self._match_text_seq("AS", "OF") 4045 kind = "AS OF" 4046 expression = self._parse_type() 4047 4048 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4049 4050 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4051 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4052 index = self._index 4053 historical_data = None 4054 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4055 this = self._prev.text.upper() 4056 kind = ( 4057 self._match(TokenType.L_PAREN) 4058 and self._match_texts(self.HISTORICAL_DATA_KIND) 4059 and self._prev.text.upper() 4060 ) 4061 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4062 4063 if expression: 4064 self._match_r_paren() 4065 historical_data = self.expression( 4066 exp.HistoricalData, this=this, kind=kind, expression=expression 4067 ) 4068 else: 4069 self._retreat(index) 4070 4071 return historical_data 4072 4073 def _parse_changes(self) -> t.Optional[exp.Changes]: 4074 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4075 return None 4076 4077 information = self._parse_var(any_token=True) 4078 self._match_r_paren() 4079 4080 return self.expression( 4081 exp.Changes, 4082 information=information, 4083 at_before=self._parse_historical_data(), 4084 end=self._parse_historical_data(), 4085 ) 4086 4087 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4088 if not self._match(TokenType.UNNEST): 4089 return None 4090 4091 expressions = self._parse_wrapped_csv(self._parse_equality) 4092 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4093 4094 alias = self._parse_table_alias() if with_alias else None 4095 4096 if alias: 4097 if self.dialect.UNNEST_COLUMN_ONLY: 4098 if alias.args.get("columns"): 4099 self.raise_error("Unexpected extra column alias in unnest.") 4100 4101 alias.set("columns", [alias.this]) 4102 alias.set("this", None) 4103 4104 columns = alias.args.get("columns") or [] 4105 if offset and len(expressions) < len(columns): 4106 offset = columns.pop() 4107 4108 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4109 self._match(TokenType.ALIAS) 4110 offset = self._parse_id_var( 4111 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4112 ) or exp.to_identifier("offset") 4113 4114 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4115 4116 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4117 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4118 if not is_derived and not ( 4119 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4120 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4121 ): 4122 return None 4123 4124 expressions = self._parse_csv(self._parse_value) 4125 alias = self._parse_table_alias() 4126 4127 if is_derived: 4128 self._match_r_paren() 4129 4130 return self.expression( 4131 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4132 ) 4133 4134 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4135 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4136 as_modifier and self._match_text_seq("USING", "SAMPLE") 4137 ): 4138 return None 4139 4140 bucket_numerator = None 4141 bucket_denominator = None 4142 bucket_field = None 4143 percent = None 4144 size = None 4145 seed = None 4146 4147 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4148 matched_l_paren = self._match(TokenType.L_PAREN) 4149 4150 if self.TABLESAMPLE_CSV: 4151 num = None 4152 expressions = self._parse_csv(self._parse_primary) 4153 else: 4154 expressions = None 4155 num = ( 4156 self._parse_factor() 4157 if self._match(TokenType.NUMBER, advance=False) 4158 else self._parse_primary() or self._parse_placeholder() 4159 ) 4160 4161 if self._match_text_seq("BUCKET"): 4162 bucket_numerator = self._parse_number() 4163 self._match_text_seq("OUT", "OF") 4164 bucket_denominator = bucket_denominator = self._parse_number() 4165 self._match(TokenType.ON) 4166 bucket_field = self._parse_field() 4167 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4168 percent = num 4169 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4170 size = num 4171 else: 4172 percent = num 4173 4174 if matched_l_paren: 4175 self._match_r_paren() 4176 4177 if self._match(TokenType.L_PAREN): 4178 method = self._parse_var(upper=True) 4179 seed = self._match(TokenType.COMMA) and self._parse_number() 4180 self._match_r_paren() 4181 elif self._match_texts(("SEED", "REPEATABLE")): 4182 seed = self._parse_wrapped(self._parse_number) 4183 4184 if not method and self.DEFAULT_SAMPLING_METHOD: 4185 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4186 4187 return self.expression( 4188 exp.TableSample, 4189 expressions=expressions, 4190 method=method, 4191 bucket_numerator=bucket_numerator, 4192 bucket_denominator=bucket_denominator, 4193 bucket_field=bucket_field, 4194 percent=percent, 4195 size=size, 4196 seed=seed, 4197 ) 4198 4199 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4200 return list(iter(self._parse_pivot, None)) or None 4201 4202 def _parse_joins(self) -> t.Iterator[exp.Join]: 4203 return iter(self._parse_join, None) 4204 4205 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4206 if not self._match(TokenType.INTO): 4207 return None 4208 4209 return self.expression( 4210 exp.UnpivotColumns, 4211 this=self._match_text_seq("NAME") and self._parse_column(), 4212 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4213 ) 4214 4215 # https://duckdb.org/docs/sql/statements/pivot 4216 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4217 def _parse_on() -> t.Optional[exp.Expression]: 4218 this = self._parse_bitwise() 4219 4220 if self._match(TokenType.IN): 4221 # PIVOT ... ON col IN (row_val1, row_val2) 4222 return self._parse_in(this) 4223 if self._match(TokenType.ALIAS, advance=False): 4224 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4225 return self._parse_alias(this) 4226 4227 return this 4228 4229 this = self._parse_table() 4230 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4231 into = self._parse_unpivot_columns() 4232 using = self._match(TokenType.USING) and self._parse_csv( 4233 lambda: self._parse_alias(self._parse_function()) 4234 ) 4235 group = self._parse_group() 4236 4237 return self.expression( 4238 exp.Pivot, 4239 this=this, 4240 expressions=expressions, 4241 using=using, 4242 group=group, 4243 unpivot=is_unpivot, 4244 into=into, 4245 ) 4246 4247 def _parse_pivot_in(self) -> exp.In: 4248 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4249 this = self._parse_select_or_expression() 4250 4251 self._match(TokenType.ALIAS) 4252 alias = self._parse_bitwise() 4253 if alias: 4254 if isinstance(alias, exp.Column) and not alias.db: 4255 alias = alias.this 4256 return self.expression(exp.PivotAlias, this=this, alias=alias) 4257 4258 return this 4259 4260 value = self._parse_column() 4261 4262 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4263 self.raise_error("Expecting IN (") 4264 4265 if self._match(TokenType.ANY): 4266 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4267 else: 4268 exprs = self._parse_csv(_parse_aliased_expression) 4269 4270 self._match_r_paren() 4271 return self.expression(exp.In, this=value, expressions=exprs) 4272 4273 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4274 index = self._index 4275 include_nulls = None 4276 4277 if self._match(TokenType.PIVOT): 4278 unpivot = False 4279 elif self._match(TokenType.UNPIVOT): 4280 unpivot = True 4281 4282 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4283 if self._match_text_seq("INCLUDE", "NULLS"): 4284 include_nulls = True 4285 elif self._match_text_seq("EXCLUDE", "NULLS"): 4286 include_nulls = False 4287 else: 4288 return None 4289 4290 expressions = [] 4291 4292 if not self._match(TokenType.L_PAREN): 4293 self._retreat(index) 4294 return None 4295 4296 if unpivot: 4297 expressions = self._parse_csv(self._parse_column) 4298 else: 4299 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4300 4301 if not expressions: 4302 self.raise_error("Failed to parse PIVOT's aggregation list") 4303 4304 if not self._match(TokenType.FOR): 4305 self.raise_error("Expecting FOR") 4306 4307 fields = [] 4308 while True: 4309 field = self._try_parse(self._parse_pivot_in) 4310 if not field: 4311 break 4312 fields.append(field) 4313 4314 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4315 self._parse_bitwise 4316 ) 4317 4318 group = self._parse_group() 4319 4320 self._match_r_paren() 4321 4322 pivot = self.expression( 4323 exp.Pivot, 4324 expressions=expressions, 4325 fields=fields, 4326 unpivot=unpivot, 4327 include_nulls=include_nulls, 4328 default_on_null=default_on_null, 4329 group=group, 4330 ) 4331 4332 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4333 pivot.set("alias", self._parse_table_alias()) 4334 4335 if not unpivot: 4336 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4337 4338 columns: t.List[exp.Expression] = [] 4339 all_fields = [] 4340 for pivot_field in pivot.fields: 4341 pivot_field_expressions = pivot_field.expressions 4342 4343 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4344 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4345 continue 4346 4347 all_fields.append( 4348 [ 4349 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4350 for fld in pivot_field_expressions 4351 ] 4352 ) 4353 4354 if all_fields: 4355 if names: 4356 all_fields.append(names) 4357 4358 # Generate all possible combinations of the pivot columns 4359 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4360 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4361 for fld_parts_tuple in itertools.product(*all_fields): 4362 fld_parts = list(fld_parts_tuple) 4363 4364 if names and self.PREFIXED_PIVOT_COLUMNS: 4365 # Move the "name" to the front of the list 4366 fld_parts.insert(0, fld_parts.pop(-1)) 4367 4368 columns.append(exp.to_identifier("_".join(fld_parts))) 4369 4370 pivot.set("columns", columns) 4371 4372 return pivot 4373 4374 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4375 return [agg.alias for agg in aggregations if agg.alias] 4376 4377 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4378 if not skip_where_token and not self._match(TokenType.PREWHERE): 4379 return None 4380 4381 return self.expression( 4382 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4383 ) 4384 4385 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4386 if not skip_where_token and not self._match(TokenType.WHERE): 4387 return None 4388 4389 return self.expression( 4390 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4391 ) 4392 4393 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4394 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4395 return None 4396 4397 elements: t.Dict[str, t.Any] = defaultdict(list) 4398 4399 if self._match(TokenType.ALL): 4400 elements["all"] = True 4401 elif self._match(TokenType.DISTINCT): 4402 elements["all"] = False 4403 4404 while True: 4405 index = self._index 4406 4407 elements["expressions"].extend( 4408 self._parse_csv( 4409 lambda: None 4410 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4411 else self._parse_assignment() 4412 ) 4413 ) 4414 4415 before_with_index = self._index 4416 with_prefix = self._match(TokenType.WITH) 4417 4418 if self._match(TokenType.ROLLUP): 4419 elements["rollup"].append( 4420 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4421 ) 4422 elif self._match(TokenType.CUBE): 4423 elements["cube"].append( 4424 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4425 ) 4426 elif self._match(TokenType.GROUPING_SETS): 4427 elements["grouping_sets"].append( 4428 self.expression( 4429 exp.GroupingSets, 4430 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4431 ) 4432 ) 4433 elif self._match_text_seq("TOTALS"): 4434 elements["totals"] = True # type: ignore 4435 4436 if before_with_index <= self._index <= before_with_index + 1: 4437 self._retreat(before_with_index) 4438 break 4439 4440 if index == self._index: 4441 break 4442 4443 return self.expression(exp.Group, **elements) # type: ignore 4444 4445 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4446 return self.expression( 4447 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4448 ) 4449 4450 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4451 if self._match(TokenType.L_PAREN): 4452 grouping_set = self._parse_csv(self._parse_column) 4453 self._match_r_paren() 4454 return self.expression(exp.Tuple, expressions=grouping_set) 4455 4456 return self._parse_column() 4457 4458 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4459 if not skip_having_token and not self._match(TokenType.HAVING): 4460 return None 4461 return self.expression(exp.Having, this=self._parse_assignment()) 4462 4463 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4464 if not self._match(TokenType.QUALIFY): 4465 return None 4466 return self.expression(exp.Qualify, this=self._parse_assignment()) 4467 4468 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4469 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4470 exp.Prior, this=self._parse_bitwise() 4471 ) 4472 connect = self._parse_assignment() 4473 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4474 return connect 4475 4476 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4477 if skip_start_token: 4478 start = None 4479 elif self._match(TokenType.START_WITH): 4480 start = self._parse_assignment() 4481 else: 4482 return None 4483 4484 self._match(TokenType.CONNECT_BY) 4485 nocycle = self._match_text_seq("NOCYCLE") 4486 connect = self._parse_connect_with_prior() 4487 4488 if not start and self._match(TokenType.START_WITH): 4489 start = self._parse_assignment() 4490 4491 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4492 4493 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4494 this = self._parse_id_var(any_token=True) 4495 if self._match(TokenType.ALIAS): 4496 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4497 return this 4498 4499 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4500 if self._match_text_seq("INTERPOLATE"): 4501 return self._parse_wrapped_csv(self._parse_name_as_expression) 4502 return None 4503 4504 def _parse_order( 4505 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4506 ) -> t.Optional[exp.Expression]: 4507 siblings = None 4508 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4509 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4510 return this 4511 4512 siblings = True 4513 4514 return self.expression( 4515 exp.Order, 4516 this=this, 4517 expressions=self._parse_csv(self._parse_ordered), 4518 siblings=siblings, 4519 ) 4520 4521 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4522 if not self._match(token): 4523 return None 4524 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4525 4526 def _parse_ordered( 4527 self, parse_method: t.Optional[t.Callable] = None 4528 ) -> t.Optional[exp.Ordered]: 4529 this = parse_method() if parse_method else self._parse_assignment() 4530 if not this: 4531 return None 4532 4533 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4534 this = exp.var("ALL") 4535 4536 asc = self._match(TokenType.ASC) 4537 desc = self._match(TokenType.DESC) or (asc and False) 4538 4539 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4540 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4541 4542 nulls_first = is_nulls_first or False 4543 explicitly_null_ordered = is_nulls_first or is_nulls_last 4544 4545 if ( 4546 not explicitly_null_ordered 4547 and ( 4548 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4549 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4550 ) 4551 and self.dialect.NULL_ORDERING != "nulls_are_last" 4552 ): 4553 nulls_first = True 4554 4555 if self._match_text_seq("WITH", "FILL"): 4556 with_fill = self.expression( 4557 exp.WithFill, 4558 **{ # type: ignore 4559 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4560 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4561 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4562 "interpolate": self._parse_interpolate(), 4563 }, 4564 ) 4565 else: 4566 with_fill = None 4567 4568 return self.expression( 4569 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4570 ) 4571 4572 def _parse_limit_options(self) -> exp.LimitOptions: 4573 percent = self._match(TokenType.PERCENT) 4574 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4575 self._match_text_seq("ONLY") 4576 with_ties = self._match_text_seq("WITH", "TIES") 4577 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4578 4579 def _parse_limit( 4580 self, 4581 this: t.Optional[exp.Expression] = None, 4582 top: bool = False, 4583 skip_limit_token: bool = False, 4584 ) -> t.Optional[exp.Expression]: 4585 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4586 comments = self._prev_comments 4587 if top: 4588 limit_paren = self._match(TokenType.L_PAREN) 4589 expression = self._parse_term() if limit_paren else self._parse_number() 4590 4591 if limit_paren: 4592 self._match_r_paren() 4593 4594 limit_options = self._parse_limit_options() 4595 else: 4596 limit_options = None 4597 expression = self._parse_term() 4598 4599 if self._match(TokenType.COMMA): 4600 offset = expression 4601 expression = self._parse_term() 4602 else: 4603 offset = None 4604 4605 limit_exp = self.expression( 4606 exp.Limit, 4607 this=this, 4608 expression=expression, 4609 offset=offset, 4610 comments=comments, 4611 limit_options=limit_options, 4612 expressions=self._parse_limit_by(), 4613 ) 4614 4615 return limit_exp 4616 4617 if self._match(TokenType.FETCH): 4618 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4619 direction = self._prev.text.upper() if direction else "FIRST" 4620 4621 count = self._parse_field(tokens=self.FETCH_TOKENS) 4622 4623 return self.expression( 4624 exp.Fetch, 4625 direction=direction, 4626 count=count, 4627 limit_options=self._parse_limit_options(), 4628 ) 4629 4630 return this 4631 4632 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4633 if not self._match(TokenType.OFFSET): 4634 return this 4635 4636 count = self._parse_term() 4637 self._match_set((TokenType.ROW, TokenType.ROWS)) 4638 4639 return self.expression( 4640 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4641 ) 4642 4643 def _can_parse_limit_or_offset(self) -> bool: 4644 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4645 return False 4646 4647 index = self._index 4648 result = bool( 4649 self._try_parse(self._parse_limit, retreat=True) 4650 or self._try_parse(self._parse_offset, retreat=True) 4651 ) 4652 self._retreat(index) 4653 return result 4654 4655 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4656 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4657 4658 def _parse_locks(self) -> t.List[exp.Lock]: 4659 locks = [] 4660 while True: 4661 if self._match_text_seq("FOR", "UPDATE"): 4662 update = True 4663 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4664 "LOCK", "IN", "SHARE", "MODE" 4665 ): 4666 update = False 4667 else: 4668 break 4669 4670 expressions = None 4671 if self._match_text_seq("OF"): 4672 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4673 4674 wait: t.Optional[bool | exp.Expression] = None 4675 if self._match_text_seq("NOWAIT"): 4676 wait = True 4677 elif self._match_text_seq("WAIT"): 4678 wait = self._parse_primary() 4679 elif self._match_text_seq("SKIP", "LOCKED"): 4680 wait = False 4681 4682 locks.append( 4683 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4684 ) 4685 4686 return locks 4687 4688 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4689 start = self._index 4690 _, side_token, kind_token = self._parse_join_parts() 4691 4692 side = side_token.text if side_token else None 4693 kind = kind_token.text if kind_token else None 4694 4695 if not self._match_set(self.SET_OPERATIONS): 4696 self._retreat(start) 4697 return None 4698 4699 token_type = self._prev.token_type 4700 4701 if token_type == TokenType.UNION: 4702 operation: t.Type[exp.SetOperation] = exp.Union 4703 elif token_type == TokenType.EXCEPT: 4704 operation = exp.Except 4705 else: 4706 operation = exp.Intersect 4707 4708 comments = self._prev.comments 4709 4710 if self._match(TokenType.DISTINCT): 4711 distinct: t.Optional[bool] = True 4712 elif self._match(TokenType.ALL): 4713 distinct = False 4714 else: 4715 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4716 if distinct is None: 4717 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4718 4719 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4720 "STRICT", "CORRESPONDING" 4721 ) 4722 if self._match_text_seq("CORRESPONDING"): 4723 by_name = True 4724 if not side and not kind: 4725 kind = "INNER" 4726 4727 on_column_list = None 4728 if by_name and self._match_texts(("ON", "BY")): 4729 on_column_list = self._parse_wrapped_csv(self._parse_column) 4730 4731 expression = self._parse_select(nested=True, parse_set_operation=False) 4732 4733 return self.expression( 4734 operation, 4735 comments=comments, 4736 this=this, 4737 distinct=distinct, 4738 by_name=by_name, 4739 expression=expression, 4740 side=side, 4741 kind=kind, 4742 on=on_column_list, 4743 ) 4744 4745 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4746 while this: 4747 setop = self.parse_set_operation(this) 4748 if not setop: 4749 break 4750 this = setop 4751 4752 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4753 expression = this.expression 4754 4755 if expression: 4756 for arg in self.SET_OP_MODIFIERS: 4757 expr = expression.args.get(arg) 4758 if expr: 4759 this.set(arg, expr.pop()) 4760 4761 return this 4762 4763 def _parse_expression(self) -> t.Optional[exp.Expression]: 4764 return self._parse_alias(self._parse_assignment()) 4765 4766 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4767 this = self._parse_disjunction() 4768 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4769 # This allows us to parse <non-identifier token> := <expr> 4770 this = exp.column( 4771 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4772 ) 4773 4774 while self._match_set(self.ASSIGNMENT): 4775 if isinstance(this, exp.Column) and len(this.parts) == 1: 4776 this = this.this 4777 4778 this = self.expression( 4779 self.ASSIGNMENT[self._prev.token_type], 4780 this=this, 4781 comments=self._prev_comments, 4782 expression=self._parse_assignment(), 4783 ) 4784 4785 return this 4786 4787 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4788 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4789 4790 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4791 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4792 4793 def _parse_equality(self) -> t.Optional[exp.Expression]: 4794 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4795 4796 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4797 return self._parse_tokens(self._parse_range, self.COMPARISON) 4798 4799 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 this = this or self._parse_bitwise() 4801 negate = self._match(TokenType.NOT) 4802 4803 if self._match_set(self.RANGE_PARSERS): 4804 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4805 if not expression: 4806 return this 4807 4808 this = expression 4809 elif self._match(TokenType.ISNULL): 4810 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4811 4812 # Postgres supports ISNULL and NOTNULL for conditions. 4813 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4814 if self._match(TokenType.NOTNULL): 4815 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4816 this = self.expression(exp.Not, this=this) 4817 4818 if negate: 4819 this = self._negate_range(this) 4820 4821 if self._match(TokenType.IS): 4822 this = self._parse_is(this) 4823 4824 return this 4825 4826 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4827 if not this: 4828 return this 4829 4830 return self.expression(exp.Not, this=this) 4831 4832 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4833 index = self._index - 1 4834 negate = self._match(TokenType.NOT) 4835 4836 if self._match_text_seq("DISTINCT", "FROM"): 4837 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4838 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4839 4840 if self._match(TokenType.JSON): 4841 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4842 4843 if self._match_text_seq("WITH"): 4844 _with = True 4845 elif self._match_text_seq("WITHOUT"): 4846 _with = False 4847 else: 4848 _with = None 4849 4850 unique = self._match(TokenType.UNIQUE) 4851 self._match_text_seq("KEYS") 4852 expression: t.Optional[exp.Expression] = self.expression( 4853 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4854 ) 4855 else: 4856 expression = self._parse_primary() or self._parse_null() 4857 if not expression: 4858 self._retreat(index) 4859 return None 4860 4861 this = self.expression(exp.Is, this=this, expression=expression) 4862 return self.expression(exp.Not, this=this) if negate else this 4863 4864 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4865 unnest = self._parse_unnest(with_alias=False) 4866 if unnest: 4867 this = self.expression(exp.In, this=this, unnest=unnest) 4868 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4869 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4870 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4871 4872 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4873 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4874 else: 4875 this = self.expression(exp.In, this=this, expressions=expressions) 4876 4877 if matched_l_paren: 4878 self._match_r_paren(this) 4879 elif not self._match(TokenType.R_BRACKET, expression=this): 4880 self.raise_error("Expecting ]") 4881 else: 4882 this = self.expression(exp.In, this=this, field=self._parse_column()) 4883 4884 return this 4885 4886 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4887 low = self._parse_bitwise() 4888 self._match(TokenType.AND) 4889 high = self._parse_bitwise() 4890 return self.expression(exp.Between, this=this, low=low, high=high) 4891 4892 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4893 if not self._match(TokenType.ESCAPE): 4894 return this 4895 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4896 4897 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4898 index = self._index 4899 4900 if not self._match(TokenType.INTERVAL) and match_interval: 4901 return None 4902 4903 if self._match(TokenType.STRING, advance=False): 4904 this = self._parse_primary() 4905 else: 4906 this = self._parse_term() 4907 4908 if not this or ( 4909 isinstance(this, exp.Column) 4910 and not this.table 4911 and not this.this.quoted 4912 and this.name.upper() == "IS" 4913 ): 4914 self._retreat(index) 4915 return None 4916 4917 unit = self._parse_function() or ( 4918 not self._match(TokenType.ALIAS, advance=False) 4919 and self._parse_var(any_token=True, upper=True) 4920 ) 4921 4922 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4923 # each INTERVAL expression into this canonical form so it's easy to transpile 4924 if this and this.is_number: 4925 this = exp.Literal.string(this.to_py()) 4926 elif this and this.is_string: 4927 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4928 if parts and unit: 4929 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4930 unit = None 4931 self._retreat(self._index - 1) 4932 4933 if len(parts) == 1: 4934 this = exp.Literal.string(parts[0][0]) 4935 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4936 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4937 unit = self.expression( 4938 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4939 ) 4940 4941 interval = self.expression(exp.Interval, this=this, unit=unit) 4942 4943 index = self._index 4944 self._match(TokenType.PLUS) 4945 4946 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4947 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4948 return self.expression( 4949 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4950 ) 4951 4952 self._retreat(index) 4953 return interval 4954 4955 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_term() 4957 4958 while True: 4959 if self._match_set(self.BITWISE): 4960 this = self.expression( 4961 self.BITWISE[self._prev.token_type], 4962 this=this, 4963 expression=self._parse_term(), 4964 ) 4965 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4966 this = self.expression( 4967 exp.DPipe, 4968 this=this, 4969 expression=self._parse_term(), 4970 safe=not self.dialect.STRICT_STRING_CONCAT, 4971 ) 4972 elif self._match(TokenType.DQMARK): 4973 this = self.expression( 4974 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4975 ) 4976 elif self._match_pair(TokenType.LT, TokenType.LT): 4977 this = self.expression( 4978 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4979 ) 4980 elif self._match_pair(TokenType.GT, TokenType.GT): 4981 this = self.expression( 4982 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4983 ) 4984 else: 4985 break 4986 4987 return this 4988 4989 def _parse_term(self) -> t.Optional[exp.Expression]: 4990 this = self._parse_factor() 4991 4992 while self._match_set(self.TERM): 4993 klass = self.TERM[self._prev.token_type] 4994 comments = self._prev_comments 4995 expression = self._parse_factor() 4996 4997 this = self.expression(klass, this=this, comments=comments, expression=expression) 4998 4999 if isinstance(this, exp.Collate): 5000 expr = this.expression 5001 5002 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5003 # fallback to Identifier / Var 5004 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5005 ident = expr.this 5006 if isinstance(ident, exp.Identifier): 5007 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5008 5009 return this 5010 5011 def _parse_factor(self) -> t.Optional[exp.Expression]: 5012 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5013 this = parse_method() 5014 5015 while self._match_set(self.FACTOR): 5016 klass = self.FACTOR[self._prev.token_type] 5017 comments = self._prev_comments 5018 expression = parse_method() 5019 5020 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5021 self._retreat(self._index - 1) 5022 return this 5023 5024 this = self.expression(klass, this=this, comments=comments, expression=expression) 5025 5026 if isinstance(this, exp.Div): 5027 this.args["typed"] = self.dialect.TYPED_DIVISION 5028 this.args["safe"] = self.dialect.SAFE_DIVISION 5029 5030 return this 5031 5032 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5033 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5034 5035 def _parse_unary(self) -> t.Optional[exp.Expression]: 5036 if self._match_set(self.UNARY_PARSERS): 5037 return self.UNARY_PARSERS[self._prev.token_type](self) 5038 return self._parse_at_time_zone(self._parse_type()) 5039 5040 def _parse_type( 5041 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5042 ) -> t.Optional[exp.Expression]: 5043 interval = parse_interval and self._parse_interval() 5044 if interval: 5045 return interval 5046 5047 index = self._index 5048 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5049 5050 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5051 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5052 if isinstance(data_type, exp.Cast): 5053 # This constructor can contain ops directly after it, for instance struct unnesting: 5054 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5055 return self._parse_column_ops(data_type) 5056 5057 if data_type: 5058 index2 = self._index 5059 this = self._parse_primary() 5060 5061 if isinstance(this, exp.Literal): 5062 this = self._parse_column_ops(this) 5063 5064 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5065 if parser: 5066 return parser(self, this, data_type) 5067 5068 return self.expression(exp.Cast, this=this, to=data_type) 5069 5070 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5071 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5072 # 5073 # If the index difference here is greater than 1, that means the parser itself must have 5074 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5075 # 5076 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5077 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5078 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5079 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5080 # 5081 # In these cases, we don't really want to return the converted type, but instead retreat 5082 # and try to parse a Column or Identifier in the section below. 5083 if data_type.expressions and index2 - index > 1: 5084 self._retreat(index2) 5085 return self._parse_column_ops(data_type) 5086 5087 self._retreat(index) 5088 5089 if fallback_to_identifier: 5090 return self._parse_id_var() 5091 5092 this = self._parse_column() 5093 return this and self._parse_column_ops(this) 5094 5095 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5096 this = self._parse_type() 5097 if not this: 5098 return None 5099 5100 if isinstance(this, exp.Column) and not this.table: 5101 this = exp.var(this.name.upper()) 5102 5103 return self.expression( 5104 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5105 ) 5106 5107 def _parse_types( 5108 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5109 ) -> t.Optional[exp.Expression]: 5110 index = self._index 5111 5112 this: t.Optional[exp.Expression] = None 5113 prefix = self._match_text_seq("SYSUDTLIB", ".") 5114 5115 if not self._match_set(self.TYPE_TOKENS): 5116 identifier = allow_identifiers and self._parse_id_var( 5117 any_token=False, tokens=(TokenType.VAR,) 5118 ) 5119 if isinstance(identifier, exp.Identifier): 5120 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5121 5122 if len(tokens) != 1: 5123 self.raise_error("Unexpected identifier", self._prev) 5124 5125 if tokens[0].token_type in self.TYPE_TOKENS: 5126 self._prev = tokens[0] 5127 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5128 type_name = identifier.name 5129 5130 while self._match(TokenType.DOT): 5131 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5132 5133 this = exp.DataType.build(type_name, udt=True) 5134 else: 5135 self._retreat(self._index - 1) 5136 return None 5137 else: 5138 return None 5139 5140 type_token = self._prev.token_type 5141 5142 if type_token == TokenType.PSEUDO_TYPE: 5143 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5144 5145 if type_token == TokenType.OBJECT_IDENTIFIER: 5146 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5147 5148 # https://materialize.com/docs/sql/types/map/ 5149 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5150 key_type = self._parse_types( 5151 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5152 ) 5153 if not self._match(TokenType.FARROW): 5154 self._retreat(index) 5155 return None 5156 5157 value_type = self._parse_types( 5158 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5159 ) 5160 if not self._match(TokenType.R_BRACKET): 5161 self._retreat(index) 5162 return None 5163 5164 return exp.DataType( 5165 this=exp.DataType.Type.MAP, 5166 expressions=[key_type, value_type], 5167 nested=True, 5168 prefix=prefix, 5169 ) 5170 5171 nested = type_token in self.NESTED_TYPE_TOKENS 5172 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5173 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5174 expressions = None 5175 maybe_func = False 5176 5177 if self._match(TokenType.L_PAREN): 5178 if is_struct: 5179 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5180 elif nested: 5181 expressions = self._parse_csv( 5182 lambda: self._parse_types( 5183 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5184 ) 5185 ) 5186 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5187 this = expressions[0] 5188 this.set("nullable", True) 5189 self._match_r_paren() 5190 return this 5191 elif type_token in self.ENUM_TYPE_TOKENS: 5192 expressions = self._parse_csv(self._parse_equality) 5193 elif is_aggregate: 5194 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5195 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5196 ) 5197 if not func_or_ident: 5198 return None 5199 expressions = [func_or_ident] 5200 if self._match(TokenType.COMMA): 5201 expressions.extend( 5202 self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, 5205 schema=schema, 5206 allow_identifiers=allow_identifiers, 5207 ) 5208 ) 5209 ) 5210 else: 5211 expressions = self._parse_csv(self._parse_type_size) 5212 5213 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5214 if type_token == TokenType.VECTOR and len(expressions) == 2: 5215 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5216 5217 if not expressions or not self._match(TokenType.R_PAREN): 5218 self._retreat(index) 5219 return None 5220 5221 maybe_func = True 5222 5223 values: t.Optional[t.List[exp.Expression]] = None 5224 5225 if nested and self._match(TokenType.LT): 5226 if is_struct: 5227 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5228 else: 5229 expressions = self._parse_csv( 5230 lambda: self._parse_types( 5231 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5232 ) 5233 ) 5234 5235 if not self._match(TokenType.GT): 5236 self.raise_error("Expecting >") 5237 5238 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5239 values = self._parse_csv(self._parse_assignment) 5240 if not values and is_struct: 5241 values = None 5242 self._retreat(self._index - 1) 5243 else: 5244 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5245 5246 if type_token in self.TIMESTAMPS: 5247 if self._match_text_seq("WITH", "TIME", "ZONE"): 5248 maybe_func = False 5249 tz_type = ( 5250 exp.DataType.Type.TIMETZ 5251 if type_token in self.TIMES 5252 else exp.DataType.Type.TIMESTAMPTZ 5253 ) 5254 this = exp.DataType(this=tz_type, expressions=expressions) 5255 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5256 maybe_func = False 5257 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5258 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5259 maybe_func = False 5260 elif type_token == TokenType.INTERVAL: 5261 unit = self._parse_var(upper=True) 5262 if unit: 5263 if self._match_text_seq("TO"): 5264 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5265 5266 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5267 else: 5268 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5269 elif type_token == TokenType.VOID: 5270 this = exp.DataType(this=exp.DataType.Type.NULL) 5271 5272 if maybe_func and check_func: 5273 index2 = self._index 5274 peek = self._parse_string() 5275 5276 if not peek: 5277 self._retreat(index) 5278 return None 5279 5280 self._retreat(index2) 5281 5282 if not this: 5283 if self._match_text_seq("UNSIGNED"): 5284 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5285 if not unsigned_type_token: 5286 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5287 5288 type_token = unsigned_type_token or type_token 5289 5290 this = exp.DataType( 5291 this=exp.DataType.Type[type_token.value], 5292 expressions=expressions, 5293 nested=nested, 5294 prefix=prefix, 5295 ) 5296 5297 # Empty arrays/structs are allowed 5298 if values is not None: 5299 cls = exp.Struct if is_struct else exp.Array 5300 this = exp.cast(cls(expressions=values), this, copy=False) 5301 5302 elif expressions: 5303 this.set("expressions", expressions) 5304 5305 # https://materialize.com/docs/sql/types/list/#type-name 5306 while self._match(TokenType.LIST): 5307 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5308 5309 index = self._index 5310 5311 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5312 matched_array = self._match(TokenType.ARRAY) 5313 5314 while self._curr: 5315 datatype_token = self._prev.token_type 5316 matched_l_bracket = self._match(TokenType.L_BRACKET) 5317 5318 if (not matched_l_bracket and not matched_array) or ( 5319 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5320 ): 5321 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5322 # not to be confused with the fixed size array parsing 5323 break 5324 5325 matched_array = False 5326 values = self._parse_csv(self._parse_assignment) or None 5327 if ( 5328 values 5329 and not schema 5330 and ( 5331 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5332 ) 5333 ): 5334 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5335 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5336 self._retreat(index) 5337 break 5338 5339 this = exp.DataType( 5340 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5341 ) 5342 self._match(TokenType.R_BRACKET) 5343 5344 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5345 converter = self.TYPE_CONVERTERS.get(this.this) 5346 if converter: 5347 this = converter(t.cast(exp.DataType, this)) 5348 5349 return this 5350 5351 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5352 index = self._index 5353 5354 if ( 5355 self._curr 5356 and self._next 5357 and self._curr.token_type in self.TYPE_TOKENS 5358 and self._next.token_type in self.TYPE_TOKENS 5359 ): 5360 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5361 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5362 this = self._parse_id_var() 5363 else: 5364 this = ( 5365 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5366 or self._parse_id_var() 5367 ) 5368 5369 self._match(TokenType.COLON) 5370 5371 if ( 5372 type_required 5373 and not isinstance(this, exp.DataType) 5374 and not self._match_set(self.TYPE_TOKENS, advance=False) 5375 ): 5376 self._retreat(index) 5377 return self._parse_types() 5378 5379 return self._parse_column_def(this) 5380 5381 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5382 if not self._match_text_seq("AT", "TIME", "ZONE"): 5383 return this 5384 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5385 5386 def _parse_column(self) -> t.Optional[exp.Expression]: 5387 this = self._parse_column_reference() 5388 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5389 5390 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5391 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5392 5393 return column 5394 5395 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5396 this = self._parse_field() 5397 if ( 5398 not this 5399 and self._match(TokenType.VALUES, advance=False) 5400 and self.VALUES_FOLLOWED_BY_PAREN 5401 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5402 ): 5403 this = self._parse_id_var() 5404 5405 if isinstance(this, exp.Identifier): 5406 # We bubble up comments from the Identifier to the Column 5407 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5408 5409 return this 5410 5411 def _parse_colon_as_variant_extract( 5412 self, this: t.Optional[exp.Expression] 5413 ) -> t.Optional[exp.Expression]: 5414 casts = [] 5415 json_path = [] 5416 escape = None 5417 5418 while self._match(TokenType.COLON): 5419 start_index = self._index 5420 5421 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5422 path = self._parse_column_ops( 5423 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5424 ) 5425 5426 # The cast :: operator has a lower precedence than the extraction operator :, so 5427 # we rearrange the AST appropriately to avoid casting the JSON path 5428 while isinstance(path, exp.Cast): 5429 casts.append(path.to) 5430 path = path.this 5431 5432 if casts: 5433 dcolon_offset = next( 5434 i 5435 for i, t in enumerate(self._tokens[start_index:]) 5436 if t.token_type == TokenType.DCOLON 5437 ) 5438 end_token = self._tokens[start_index + dcolon_offset - 1] 5439 else: 5440 end_token = self._prev 5441 5442 if path: 5443 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5444 # it'll roundtrip to a string literal in GET_PATH 5445 if isinstance(path, exp.Identifier) and path.quoted: 5446 escape = True 5447 5448 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5449 5450 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5451 # Databricks transforms it back to the colon/dot notation 5452 if json_path: 5453 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5454 5455 if json_path_expr: 5456 json_path_expr.set("escape", escape) 5457 5458 this = self.expression( 5459 exp.JSONExtract, 5460 this=this, 5461 expression=json_path_expr, 5462 variant_extract=True, 5463 ) 5464 5465 while casts: 5466 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5467 5468 return this 5469 5470 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5471 return self._parse_types() 5472 5473 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5474 this = self._parse_bracket(this) 5475 5476 while self._match_set(self.COLUMN_OPERATORS): 5477 op_token = self._prev.token_type 5478 op = self.COLUMN_OPERATORS.get(op_token) 5479 5480 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5481 field = self._parse_dcolon() 5482 if not field: 5483 self.raise_error("Expected type") 5484 elif op and self._curr: 5485 field = self._parse_column_reference() or self._parse_bracket() 5486 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5487 field = self._parse_column_ops(field) 5488 else: 5489 field = self._parse_field(any_token=True, anonymous_func=True) 5490 5491 if isinstance(field, (exp.Func, exp.Window)) and this: 5492 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5493 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5494 this = exp.replace_tree( 5495 this, 5496 lambda n: ( 5497 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5498 if n.table 5499 else n.this 5500 ) 5501 if isinstance(n, exp.Column) 5502 else n, 5503 ) 5504 5505 if op: 5506 this = op(self, this, field) 5507 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5508 this = self.expression( 5509 exp.Column, 5510 comments=this.comments, 5511 this=field, 5512 table=this.this, 5513 db=this.args.get("table"), 5514 catalog=this.args.get("db"), 5515 ) 5516 elif isinstance(field, exp.Window): 5517 # Move the exp.Dot's to the window's function 5518 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5519 field.set("this", window_func) 5520 this = field 5521 else: 5522 this = self.expression(exp.Dot, this=this, expression=field) 5523 5524 if field and field.comments: 5525 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5526 5527 this = self._parse_bracket(this) 5528 5529 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5530 5531 def _parse_primary(self) -> t.Optional[exp.Expression]: 5532 if self._match_set(self.PRIMARY_PARSERS): 5533 token_type = self._prev.token_type 5534 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5535 5536 if token_type == TokenType.STRING: 5537 expressions = [primary] 5538 while self._match(TokenType.STRING): 5539 expressions.append(exp.Literal.string(self._prev.text)) 5540 5541 if len(expressions) > 1: 5542 return self.expression(exp.Concat, expressions=expressions) 5543 5544 return primary 5545 5546 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5547 return exp.Literal.number(f"0.{self._prev.text}") 5548 5549 if self._match(TokenType.L_PAREN): 5550 comments = self._prev_comments 5551 query = self._parse_select() 5552 5553 if query: 5554 expressions = [query] 5555 else: 5556 expressions = self._parse_expressions() 5557 5558 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5559 5560 if not this and self._match(TokenType.R_PAREN, advance=False): 5561 this = self.expression(exp.Tuple) 5562 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5563 this = self._parse_subquery(this=this, parse_alias=False) 5564 elif isinstance(this, exp.Subquery): 5565 this = self._parse_subquery( 5566 this=self._parse_set_operations(this), parse_alias=False 5567 ) 5568 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5569 this = self.expression(exp.Tuple, expressions=expressions) 5570 else: 5571 this = self.expression(exp.Paren, this=this) 5572 5573 if this: 5574 this.add_comments(comments) 5575 5576 self._match_r_paren(expression=this) 5577 return this 5578 5579 return None 5580 5581 def _parse_field( 5582 self, 5583 any_token: bool = False, 5584 tokens: t.Optional[t.Collection[TokenType]] = None, 5585 anonymous_func: bool = False, 5586 ) -> t.Optional[exp.Expression]: 5587 if anonymous_func: 5588 field = ( 5589 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5590 or self._parse_primary() 5591 ) 5592 else: 5593 field = self._parse_primary() or self._parse_function( 5594 anonymous=anonymous_func, any_token=any_token 5595 ) 5596 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5597 5598 def _parse_function( 5599 self, 5600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5601 anonymous: bool = False, 5602 optional_parens: bool = True, 5603 any_token: bool = False, 5604 ) -> t.Optional[exp.Expression]: 5605 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5606 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5607 fn_syntax = False 5608 if ( 5609 self._match(TokenType.L_BRACE, advance=False) 5610 and self._next 5611 and self._next.text.upper() == "FN" 5612 ): 5613 self._advance(2) 5614 fn_syntax = True 5615 5616 func = self._parse_function_call( 5617 functions=functions, 5618 anonymous=anonymous, 5619 optional_parens=optional_parens, 5620 any_token=any_token, 5621 ) 5622 5623 if fn_syntax: 5624 self._match(TokenType.R_BRACE) 5625 5626 return func 5627 5628 def _parse_function_call( 5629 self, 5630 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5631 anonymous: bool = False, 5632 optional_parens: bool = True, 5633 any_token: bool = False, 5634 ) -> t.Optional[exp.Expression]: 5635 if not self._curr: 5636 return None 5637 5638 comments = self._curr.comments 5639 token = self._curr 5640 token_type = self._curr.token_type 5641 this = self._curr.text 5642 upper = this.upper() 5643 5644 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5645 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5646 self._advance() 5647 return self._parse_window(parser(self)) 5648 5649 if not self._next or self._next.token_type != TokenType.L_PAREN: 5650 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5651 self._advance() 5652 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5653 5654 return None 5655 5656 if any_token: 5657 if token_type in self.RESERVED_TOKENS: 5658 return None 5659 elif token_type not in self.FUNC_TOKENS: 5660 return None 5661 5662 self._advance(2) 5663 5664 parser = self.FUNCTION_PARSERS.get(upper) 5665 if parser and not anonymous: 5666 this = parser(self) 5667 else: 5668 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5669 5670 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5671 this = self.expression( 5672 subquery_predicate, comments=comments, this=self._parse_select() 5673 ) 5674 self._match_r_paren() 5675 return this 5676 5677 if functions is None: 5678 functions = self.FUNCTIONS 5679 5680 function = functions.get(upper) 5681 known_function = function and not anonymous 5682 5683 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5684 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5685 5686 post_func_comments = self._curr and self._curr.comments 5687 if known_function and post_func_comments: 5688 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5689 # call we'll construct it as exp.Anonymous, even if it's "known" 5690 if any( 5691 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5692 for comment in post_func_comments 5693 ): 5694 known_function = False 5695 5696 if alias and known_function: 5697 args = self._kv_to_prop_eq(args) 5698 5699 if known_function: 5700 func_builder = t.cast(t.Callable, function) 5701 5702 if "dialect" in func_builder.__code__.co_varnames: 5703 func = func_builder(args, dialect=self.dialect) 5704 else: 5705 func = func_builder(args) 5706 5707 func = self.validate_expression(func, args) 5708 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5709 func.meta["name"] = this 5710 5711 this = func 5712 else: 5713 if token_type == TokenType.IDENTIFIER: 5714 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5715 5716 this = self.expression(exp.Anonymous, this=this, expressions=args) 5717 this = this.update_positions(token) 5718 5719 if isinstance(this, exp.Expression): 5720 this.add_comments(comments) 5721 5722 self._match_r_paren(this) 5723 return self._parse_window(this) 5724 5725 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5726 return expression 5727 5728 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5729 transformed = [] 5730 5731 for index, e in enumerate(expressions): 5732 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5733 if isinstance(e, exp.Alias): 5734 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5735 5736 if not isinstance(e, exp.PropertyEQ): 5737 e = self.expression( 5738 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5739 ) 5740 5741 if isinstance(e.this, exp.Column): 5742 e.this.replace(e.this.this) 5743 else: 5744 e = self._to_prop_eq(e, index) 5745 5746 transformed.append(e) 5747 5748 return transformed 5749 5750 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5751 return self._parse_statement() 5752 5753 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5754 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5755 5756 def _parse_user_defined_function( 5757 self, kind: t.Optional[TokenType] = None 5758 ) -> t.Optional[exp.Expression]: 5759 this = self._parse_table_parts(schema=True) 5760 5761 if not self._match(TokenType.L_PAREN): 5762 return this 5763 5764 expressions = self._parse_csv(self._parse_function_parameter) 5765 self._match_r_paren() 5766 return self.expression( 5767 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5768 ) 5769 5770 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5771 literal = self._parse_primary() 5772 if literal: 5773 return self.expression(exp.Introducer, this=token.text, expression=literal) 5774 5775 return self._identifier_expression(token) 5776 5777 def _parse_session_parameter(self) -> exp.SessionParameter: 5778 kind = None 5779 this = self._parse_id_var() or self._parse_primary() 5780 5781 if this and self._match(TokenType.DOT): 5782 kind = this.name 5783 this = self._parse_var() or self._parse_primary() 5784 5785 return self.expression(exp.SessionParameter, this=this, kind=kind) 5786 5787 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5788 return self._parse_id_var() 5789 5790 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5791 index = self._index 5792 5793 if self._match(TokenType.L_PAREN): 5794 expressions = t.cast( 5795 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5796 ) 5797 5798 if not self._match(TokenType.R_PAREN): 5799 self._retreat(index) 5800 else: 5801 expressions = [self._parse_lambda_arg()] 5802 5803 if self._match_set(self.LAMBDAS): 5804 return self.LAMBDAS[self._prev.token_type](self, expressions) 5805 5806 self._retreat(index) 5807 5808 this: t.Optional[exp.Expression] 5809 5810 if self._match(TokenType.DISTINCT): 5811 this = self.expression( 5812 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5813 ) 5814 else: 5815 this = self._parse_select_or_expression(alias=alias) 5816 5817 return self._parse_limit( 5818 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5819 ) 5820 5821 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5822 index = self._index 5823 if not self._match(TokenType.L_PAREN): 5824 return this 5825 5826 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5827 # expr can be of both types 5828 if self._match_set(self.SELECT_START_TOKENS): 5829 self._retreat(index) 5830 return this 5831 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5832 self._match_r_paren() 5833 return self.expression(exp.Schema, this=this, expressions=args) 5834 5835 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5836 return self._parse_column_def(self._parse_field(any_token=True)) 5837 5838 def _parse_column_def( 5839 self, this: t.Optional[exp.Expression], computed_column: bool = True 5840 ) -> t.Optional[exp.Expression]: 5841 # column defs are not really columns, they're identifiers 5842 if isinstance(this, exp.Column): 5843 this = this.this 5844 5845 if not computed_column: 5846 self._match(TokenType.ALIAS) 5847 5848 kind = self._parse_types(schema=True) 5849 5850 if self._match_text_seq("FOR", "ORDINALITY"): 5851 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5852 5853 constraints: t.List[exp.Expression] = [] 5854 5855 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5856 ("ALIAS", "MATERIALIZED") 5857 ): 5858 persisted = self._prev.text.upper() == "MATERIALIZED" 5859 constraint_kind = exp.ComputedColumnConstraint( 5860 this=self._parse_assignment(), 5861 persisted=persisted or self._match_text_seq("PERSISTED"), 5862 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5863 ) 5864 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5865 elif ( 5866 kind 5867 and self._match(TokenType.ALIAS, advance=False) 5868 and ( 5869 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5870 or (self._next and self._next.token_type == TokenType.L_PAREN) 5871 ) 5872 ): 5873 self._advance() 5874 constraints.append( 5875 self.expression( 5876 exp.ColumnConstraint, 5877 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5878 ) 5879 ) 5880 5881 while True: 5882 constraint = self._parse_column_constraint() 5883 if not constraint: 5884 break 5885 constraints.append(constraint) 5886 5887 if not kind and not constraints: 5888 return this 5889 5890 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5891 5892 def _parse_auto_increment( 5893 self, 5894 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5895 start = None 5896 increment = None 5897 5898 if self._match(TokenType.L_PAREN, advance=False): 5899 args = self._parse_wrapped_csv(self._parse_bitwise) 5900 start = seq_get(args, 0) 5901 increment = seq_get(args, 1) 5902 elif self._match_text_seq("START"): 5903 start = self._parse_bitwise() 5904 self._match_text_seq("INCREMENT") 5905 increment = self._parse_bitwise() 5906 5907 if start and increment: 5908 return exp.GeneratedAsIdentityColumnConstraint( 5909 start=start, increment=increment, this=False 5910 ) 5911 5912 return exp.AutoIncrementColumnConstraint() 5913 5914 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5915 if not self._match_text_seq("REFRESH"): 5916 self._retreat(self._index - 1) 5917 return None 5918 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5919 5920 def _parse_compress(self) -> exp.CompressColumnConstraint: 5921 if self._match(TokenType.L_PAREN, advance=False): 5922 return self.expression( 5923 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5924 ) 5925 5926 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5927 5928 def _parse_generated_as_identity( 5929 self, 5930 ) -> ( 5931 exp.GeneratedAsIdentityColumnConstraint 5932 | exp.ComputedColumnConstraint 5933 | exp.GeneratedAsRowColumnConstraint 5934 ): 5935 if self._match_text_seq("BY", "DEFAULT"): 5936 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5937 this = self.expression( 5938 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5939 ) 5940 else: 5941 self._match_text_seq("ALWAYS") 5942 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5943 5944 self._match(TokenType.ALIAS) 5945 5946 if self._match_text_seq("ROW"): 5947 start = self._match_text_seq("START") 5948 if not start: 5949 self._match(TokenType.END) 5950 hidden = self._match_text_seq("HIDDEN") 5951 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5952 5953 identity = self._match_text_seq("IDENTITY") 5954 5955 if self._match(TokenType.L_PAREN): 5956 if self._match(TokenType.START_WITH): 5957 this.set("start", self._parse_bitwise()) 5958 if self._match_text_seq("INCREMENT", "BY"): 5959 this.set("increment", self._parse_bitwise()) 5960 if self._match_text_seq("MINVALUE"): 5961 this.set("minvalue", self._parse_bitwise()) 5962 if self._match_text_seq("MAXVALUE"): 5963 this.set("maxvalue", self._parse_bitwise()) 5964 5965 if self._match_text_seq("CYCLE"): 5966 this.set("cycle", True) 5967 elif self._match_text_seq("NO", "CYCLE"): 5968 this.set("cycle", False) 5969 5970 if not identity: 5971 this.set("expression", self._parse_range()) 5972 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5973 args = self._parse_csv(self._parse_bitwise) 5974 this.set("start", seq_get(args, 0)) 5975 this.set("increment", seq_get(args, 1)) 5976 5977 self._match_r_paren() 5978 5979 return this 5980 5981 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5982 self._match_text_seq("LENGTH") 5983 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5984 5985 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5986 if self._match_text_seq("NULL"): 5987 return self.expression(exp.NotNullColumnConstraint) 5988 if self._match_text_seq("CASESPECIFIC"): 5989 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5990 if self._match_text_seq("FOR", "REPLICATION"): 5991 return self.expression(exp.NotForReplicationColumnConstraint) 5992 5993 # Unconsume the `NOT` token 5994 self._retreat(self._index - 1) 5995 return None 5996 5997 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5998 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5999 6000 procedure_option_follows = ( 6001 self._match(TokenType.WITH, advance=False) 6002 and self._next 6003 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6004 ) 6005 6006 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6007 return self.expression( 6008 exp.ColumnConstraint, 6009 this=this, 6010 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6011 ) 6012 6013 return this 6014 6015 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6016 if not self._match(TokenType.CONSTRAINT): 6017 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6018 6019 return self.expression( 6020 exp.Constraint, 6021 this=self._parse_id_var(), 6022 expressions=self._parse_unnamed_constraints(), 6023 ) 6024 6025 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6026 constraints = [] 6027 while True: 6028 constraint = self._parse_unnamed_constraint() or self._parse_function() 6029 if not constraint: 6030 break 6031 constraints.append(constraint) 6032 6033 return constraints 6034 6035 def _parse_unnamed_constraint( 6036 self, constraints: t.Optional[t.Collection[str]] = None 6037 ) -> t.Optional[exp.Expression]: 6038 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6039 constraints or self.CONSTRAINT_PARSERS 6040 ): 6041 return None 6042 6043 constraint = self._prev.text.upper() 6044 if constraint not in self.CONSTRAINT_PARSERS: 6045 self.raise_error(f"No parser found for schema constraint {constraint}.") 6046 6047 return self.CONSTRAINT_PARSERS[constraint](self) 6048 6049 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6050 return self._parse_id_var(any_token=False) 6051 6052 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6053 self._match_text_seq("KEY") 6054 return self.expression( 6055 exp.UniqueColumnConstraint, 6056 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6057 this=self._parse_schema(self._parse_unique_key()), 6058 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6059 on_conflict=self._parse_on_conflict(), 6060 options=self._parse_key_constraint_options(), 6061 ) 6062 6063 def _parse_key_constraint_options(self) -> t.List[str]: 6064 options = [] 6065 while True: 6066 if not self._curr: 6067 break 6068 6069 if self._match(TokenType.ON): 6070 action = None 6071 on = self._advance_any() and self._prev.text 6072 6073 if self._match_text_seq("NO", "ACTION"): 6074 action = "NO ACTION" 6075 elif self._match_text_seq("CASCADE"): 6076 action = "CASCADE" 6077 elif self._match_text_seq("RESTRICT"): 6078 action = "RESTRICT" 6079 elif self._match_pair(TokenType.SET, TokenType.NULL): 6080 action = "SET NULL" 6081 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6082 action = "SET DEFAULT" 6083 else: 6084 self.raise_error("Invalid key constraint") 6085 6086 options.append(f"ON {on} {action}") 6087 else: 6088 var = self._parse_var_from_options( 6089 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6090 ) 6091 if not var: 6092 break 6093 options.append(var.name) 6094 6095 return options 6096 6097 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6098 if match and not self._match(TokenType.REFERENCES): 6099 return None 6100 6101 expressions = None 6102 this = self._parse_table(schema=True) 6103 options = self._parse_key_constraint_options() 6104 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6105 6106 def _parse_foreign_key(self) -> exp.ForeignKey: 6107 expressions = ( 6108 self._parse_wrapped_id_vars() 6109 if not self._match(TokenType.REFERENCES, advance=False) 6110 else None 6111 ) 6112 reference = self._parse_references() 6113 on_options = {} 6114 6115 while self._match(TokenType.ON): 6116 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6117 self.raise_error("Expected DELETE or UPDATE") 6118 6119 kind = self._prev.text.lower() 6120 6121 if self._match_text_seq("NO", "ACTION"): 6122 action = "NO ACTION" 6123 elif self._match(TokenType.SET): 6124 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6125 action = "SET " + self._prev.text.upper() 6126 else: 6127 self._advance() 6128 action = self._prev.text.upper() 6129 6130 on_options[kind] = action 6131 6132 return self.expression( 6133 exp.ForeignKey, 6134 expressions=expressions, 6135 reference=reference, 6136 options=self._parse_key_constraint_options(), 6137 **on_options, # type: ignore 6138 ) 6139 6140 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6141 return self._parse_ordered() or self._parse_field() 6142 6143 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6144 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6145 self._retreat(self._index - 1) 6146 return None 6147 6148 id_vars = self._parse_wrapped_id_vars() 6149 return self.expression( 6150 exp.PeriodForSystemTimeConstraint, 6151 this=seq_get(id_vars, 0), 6152 expression=seq_get(id_vars, 1), 6153 ) 6154 6155 def _parse_primary_key( 6156 self, wrapped_optional: bool = False, in_props: bool = False 6157 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6158 desc = ( 6159 self._match_set((TokenType.ASC, TokenType.DESC)) 6160 and self._prev.token_type == TokenType.DESC 6161 ) 6162 6163 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6164 return self.expression( 6165 exp.PrimaryKeyColumnConstraint, 6166 desc=desc, 6167 options=self._parse_key_constraint_options(), 6168 ) 6169 6170 expressions = self._parse_wrapped_csv( 6171 self._parse_primary_key_part, optional=wrapped_optional 6172 ) 6173 options = self._parse_key_constraint_options() 6174 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6175 6176 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6177 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6178 6179 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6180 """ 6181 Parses a datetime column in ODBC format. We parse the column into the corresponding 6182 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6183 same as we did for `DATE('yyyy-mm-dd')`. 6184 6185 Reference: 6186 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6187 """ 6188 self._match(TokenType.VAR) 6189 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6190 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6191 if not self._match(TokenType.R_BRACE): 6192 self.raise_error("Expected }") 6193 return expression 6194 6195 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6196 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6197 return this 6198 6199 bracket_kind = self._prev.token_type 6200 if ( 6201 bracket_kind == TokenType.L_BRACE 6202 and self._curr 6203 and self._curr.token_type == TokenType.VAR 6204 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6205 ): 6206 return self._parse_odbc_datetime_literal() 6207 6208 expressions = self._parse_csv( 6209 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6210 ) 6211 6212 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6213 self.raise_error("Expected ]") 6214 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6215 self.raise_error("Expected }") 6216 6217 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6218 if bracket_kind == TokenType.L_BRACE: 6219 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6220 elif not this: 6221 this = build_array_constructor( 6222 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6223 ) 6224 else: 6225 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6226 if constructor_type: 6227 return build_array_constructor( 6228 constructor_type, 6229 args=expressions, 6230 bracket_kind=bracket_kind, 6231 dialect=self.dialect, 6232 ) 6233 6234 expressions = apply_index_offset( 6235 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6236 ) 6237 this = self.expression( 6238 exp.Bracket, 6239 this=this, 6240 expressions=expressions, 6241 comments=this.pop_comments(), 6242 ) 6243 6244 self._add_comments(this) 6245 return self._parse_bracket(this) 6246 6247 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6248 if self._match(TokenType.COLON): 6249 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6250 return this 6251 6252 def _parse_case(self) -> t.Optional[exp.Expression]: 6253 ifs = [] 6254 default = None 6255 6256 comments = self._prev_comments 6257 expression = self._parse_assignment() 6258 6259 while self._match(TokenType.WHEN): 6260 this = self._parse_assignment() 6261 self._match(TokenType.THEN) 6262 then = self._parse_assignment() 6263 ifs.append(self.expression(exp.If, this=this, true=then)) 6264 6265 if self._match(TokenType.ELSE): 6266 default = self._parse_assignment() 6267 6268 if not self._match(TokenType.END): 6269 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6270 default = exp.column("interval") 6271 else: 6272 self.raise_error("Expected END after CASE", self._prev) 6273 6274 return self.expression( 6275 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6276 ) 6277 6278 def _parse_if(self) -> t.Optional[exp.Expression]: 6279 if self._match(TokenType.L_PAREN): 6280 args = self._parse_csv( 6281 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6282 ) 6283 this = self.validate_expression(exp.If.from_arg_list(args), args) 6284 self._match_r_paren() 6285 else: 6286 index = self._index - 1 6287 6288 if self.NO_PAREN_IF_COMMANDS and index == 0: 6289 return self._parse_as_command(self._prev) 6290 6291 condition = self._parse_assignment() 6292 6293 if not condition: 6294 self._retreat(index) 6295 return None 6296 6297 self._match(TokenType.THEN) 6298 true = self._parse_assignment() 6299 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6300 self._match(TokenType.END) 6301 this = self.expression(exp.If, this=condition, true=true, false=false) 6302 6303 return this 6304 6305 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6306 if not self._match_text_seq("VALUE", "FOR"): 6307 self._retreat(self._index - 1) 6308 return None 6309 6310 return self.expression( 6311 exp.NextValueFor, 6312 this=self._parse_column(), 6313 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6314 ) 6315 6316 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6317 this = self._parse_function() or self._parse_var_or_string(upper=True) 6318 6319 if self._match(TokenType.FROM): 6320 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6321 6322 if not self._match(TokenType.COMMA): 6323 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6324 6325 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6326 6327 def _parse_gap_fill(self) -> exp.GapFill: 6328 self._match(TokenType.TABLE) 6329 this = self._parse_table() 6330 6331 self._match(TokenType.COMMA) 6332 args = [this, *self._parse_csv(self._parse_lambda)] 6333 6334 gap_fill = exp.GapFill.from_arg_list(args) 6335 return self.validate_expression(gap_fill, args) 6336 6337 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6338 this = self._parse_assignment() 6339 6340 if not self._match(TokenType.ALIAS): 6341 if self._match(TokenType.COMMA): 6342 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6343 6344 self.raise_error("Expected AS after CAST") 6345 6346 fmt = None 6347 to = self._parse_types() 6348 6349 default = self._match(TokenType.DEFAULT) 6350 if default: 6351 default = self._parse_bitwise() 6352 self._match_text_seq("ON", "CONVERSION", "ERROR") 6353 6354 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6355 fmt_string = self._parse_string() 6356 fmt = self._parse_at_time_zone(fmt_string) 6357 6358 if not to: 6359 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6360 if to.this in exp.DataType.TEMPORAL_TYPES: 6361 this = self.expression( 6362 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6363 this=this, 6364 format=exp.Literal.string( 6365 format_time( 6366 fmt_string.this if fmt_string else "", 6367 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6368 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6369 ) 6370 ), 6371 safe=safe, 6372 ) 6373 6374 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6375 this.set("zone", fmt.args["zone"]) 6376 return this 6377 elif not to: 6378 self.raise_error("Expected TYPE after CAST") 6379 elif isinstance(to, exp.Identifier): 6380 to = exp.DataType.build(to.name, udt=True) 6381 elif to.this == exp.DataType.Type.CHAR: 6382 if self._match(TokenType.CHARACTER_SET): 6383 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6384 6385 return self.expression( 6386 exp.Cast if strict else exp.TryCast, 6387 this=this, 6388 to=to, 6389 format=fmt, 6390 safe=safe, 6391 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6392 default=default, 6393 ) 6394 6395 def _parse_string_agg(self) -> exp.GroupConcat: 6396 if self._match(TokenType.DISTINCT): 6397 args: t.List[t.Optional[exp.Expression]] = [ 6398 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6399 ] 6400 if self._match(TokenType.COMMA): 6401 args.extend(self._parse_csv(self._parse_assignment)) 6402 else: 6403 args = self._parse_csv(self._parse_assignment) # type: ignore 6404 6405 if self._match_text_seq("ON", "OVERFLOW"): 6406 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6407 if self._match_text_seq("ERROR"): 6408 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6409 else: 6410 self._match_text_seq("TRUNCATE") 6411 on_overflow = self.expression( 6412 exp.OverflowTruncateBehavior, 6413 this=self._parse_string(), 6414 with_count=( 6415 self._match_text_seq("WITH", "COUNT") 6416 or not self._match_text_seq("WITHOUT", "COUNT") 6417 ), 6418 ) 6419 else: 6420 on_overflow = None 6421 6422 index = self._index 6423 if not self._match(TokenType.R_PAREN) and args: 6424 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6425 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6426 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6427 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6428 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6429 6430 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6431 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6432 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6433 if not self._match_text_seq("WITHIN", "GROUP"): 6434 self._retreat(index) 6435 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6436 6437 # The corresponding match_r_paren will be called in parse_function (caller) 6438 self._match_l_paren() 6439 6440 return self.expression( 6441 exp.GroupConcat, 6442 this=self._parse_order(this=seq_get(args, 0)), 6443 separator=seq_get(args, 1), 6444 on_overflow=on_overflow, 6445 ) 6446 6447 def _parse_convert( 6448 self, strict: bool, safe: t.Optional[bool] = None 6449 ) -> t.Optional[exp.Expression]: 6450 this = self._parse_bitwise() 6451 6452 if self._match(TokenType.USING): 6453 to: t.Optional[exp.Expression] = self.expression( 6454 exp.CharacterSet, this=self._parse_var() 6455 ) 6456 elif self._match(TokenType.COMMA): 6457 to = self._parse_types() 6458 else: 6459 to = None 6460 6461 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6462 6463 def _parse_xml_table(self) -> exp.XMLTable: 6464 namespaces = None 6465 passing = None 6466 columns = None 6467 6468 if self._match_text_seq("XMLNAMESPACES", "("): 6469 namespaces = self._parse_xml_namespace() 6470 self._match_text_seq(")", ",") 6471 6472 this = self._parse_string() 6473 6474 if self._match_text_seq("PASSING"): 6475 # The BY VALUE keywords are optional and are provided for semantic clarity 6476 self._match_text_seq("BY", "VALUE") 6477 passing = self._parse_csv(self._parse_column) 6478 6479 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6480 6481 if self._match_text_seq("COLUMNS"): 6482 columns = self._parse_csv(self._parse_field_def) 6483 6484 return self.expression( 6485 exp.XMLTable, 6486 this=this, 6487 namespaces=namespaces, 6488 passing=passing, 6489 columns=columns, 6490 by_ref=by_ref, 6491 ) 6492 6493 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6494 namespaces = [] 6495 6496 while True: 6497 if self._match(TokenType.DEFAULT): 6498 uri = self._parse_string() 6499 else: 6500 uri = self._parse_alias(self._parse_string()) 6501 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6502 if not self._match(TokenType.COMMA): 6503 break 6504 6505 return namespaces 6506 6507 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6508 """ 6509 There are generally two variants of the DECODE function: 6510 6511 - DECODE(bin, charset) 6512 - DECODE(expression, search, result [, search, result] ... [, default]) 6513 6514 The second variant will always be parsed into a CASE expression. Note that NULL 6515 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6516 instead of relying on pattern matching. 6517 """ 6518 args = self._parse_csv(self._parse_assignment) 6519 6520 if len(args) < 3: 6521 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6522 6523 expression, *expressions = args 6524 if not expression: 6525 return None 6526 6527 ifs = [] 6528 for search, result in zip(expressions[::2], expressions[1::2]): 6529 if not search or not result: 6530 return None 6531 6532 if isinstance(search, exp.Literal): 6533 ifs.append( 6534 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6535 ) 6536 elif isinstance(search, exp.Null): 6537 ifs.append( 6538 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6539 ) 6540 else: 6541 cond = exp.or_( 6542 exp.EQ(this=expression.copy(), expression=search), 6543 exp.and_( 6544 exp.Is(this=expression.copy(), expression=exp.Null()), 6545 exp.Is(this=search.copy(), expression=exp.Null()), 6546 copy=False, 6547 ), 6548 copy=False, 6549 ) 6550 ifs.append(exp.If(this=cond, true=result)) 6551 6552 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6553 6554 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6555 self._match_text_seq("KEY") 6556 key = self._parse_column() 6557 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6558 self._match_text_seq("VALUE") 6559 value = self._parse_bitwise() 6560 6561 if not key and not value: 6562 return None 6563 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6564 6565 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6566 if not this or not self._match_text_seq("FORMAT", "JSON"): 6567 return this 6568 6569 return self.expression(exp.FormatJson, this=this) 6570 6571 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6572 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6573 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6574 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6575 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6576 else: 6577 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6578 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6579 6580 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6581 6582 if not empty and not error and not null: 6583 return None 6584 6585 return self.expression( 6586 exp.OnCondition, 6587 empty=empty, 6588 error=error, 6589 null=null, 6590 ) 6591 6592 def _parse_on_handling( 6593 self, on: str, *values: str 6594 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6595 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6596 for value in values: 6597 if self._match_text_seq(value, "ON", on): 6598 return f"{value} ON {on}" 6599 6600 index = self._index 6601 if self._match(TokenType.DEFAULT): 6602 default_value = self._parse_bitwise() 6603 if self._match_text_seq("ON", on): 6604 return default_value 6605 6606 self._retreat(index) 6607 6608 return None 6609 6610 @t.overload 6611 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6612 6613 @t.overload 6614 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6615 6616 def _parse_json_object(self, agg=False): 6617 star = self._parse_star() 6618 expressions = ( 6619 [star] 6620 if star 6621 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6622 ) 6623 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6624 6625 unique_keys = None 6626 if self._match_text_seq("WITH", "UNIQUE"): 6627 unique_keys = True 6628 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6629 unique_keys = False 6630 6631 self._match_text_seq("KEYS") 6632 6633 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6634 self._parse_type() 6635 ) 6636 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6637 6638 return self.expression( 6639 exp.JSONObjectAgg if agg else exp.JSONObject, 6640 expressions=expressions, 6641 null_handling=null_handling, 6642 unique_keys=unique_keys, 6643 return_type=return_type, 6644 encoding=encoding, 6645 ) 6646 6647 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6648 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6649 if not self._match_text_seq("NESTED"): 6650 this = self._parse_id_var() 6651 kind = self._parse_types(allow_identifiers=False) 6652 nested = None 6653 else: 6654 this = None 6655 kind = None 6656 nested = True 6657 6658 path = self._match_text_seq("PATH") and self._parse_string() 6659 nested_schema = nested and self._parse_json_schema() 6660 6661 return self.expression( 6662 exp.JSONColumnDef, 6663 this=this, 6664 kind=kind, 6665 path=path, 6666 nested_schema=nested_schema, 6667 ) 6668 6669 def _parse_json_schema(self) -> exp.JSONSchema: 6670 self._match_text_seq("COLUMNS") 6671 return self.expression( 6672 exp.JSONSchema, 6673 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6674 ) 6675 6676 def _parse_json_table(self) -> exp.JSONTable: 6677 this = self._parse_format_json(self._parse_bitwise()) 6678 path = self._match(TokenType.COMMA) and self._parse_string() 6679 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6680 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6681 schema = self._parse_json_schema() 6682 6683 return exp.JSONTable( 6684 this=this, 6685 schema=schema, 6686 path=path, 6687 error_handling=error_handling, 6688 empty_handling=empty_handling, 6689 ) 6690 6691 def _parse_match_against(self) -> exp.MatchAgainst: 6692 expressions = self._parse_csv(self._parse_column) 6693 6694 self._match_text_seq(")", "AGAINST", "(") 6695 6696 this = self._parse_string() 6697 6698 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6699 modifier = "IN NATURAL LANGUAGE MODE" 6700 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6701 modifier = f"{modifier} WITH QUERY EXPANSION" 6702 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6703 modifier = "IN BOOLEAN MODE" 6704 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6705 modifier = "WITH QUERY EXPANSION" 6706 else: 6707 modifier = None 6708 6709 return self.expression( 6710 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6711 ) 6712 6713 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6714 def _parse_open_json(self) -> exp.OpenJSON: 6715 this = self._parse_bitwise() 6716 path = self._match(TokenType.COMMA) and self._parse_string() 6717 6718 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6719 this = self._parse_field(any_token=True) 6720 kind = self._parse_types() 6721 path = self._parse_string() 6722 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6723 6724 return self.expression( 6725 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6726 ) 6727 6728 expressions = None 6729 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6730 self._match_l_paren() 6731 expressions = self._parse_csv(_parse_open_json_column_def) 6732 6733 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6734 6735 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6736 args = self._parse_csv(self._parse_bitwise) 6737 6738 if self._match(TokenType.IN): 6739 return self.expression( 6740 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6741 ) 6742 6743 if haystack_first: 6744 haystack = seq_get(args, 0) 6745 needle = seq_get(args, 1) 6746 else: 6747 haystack = seq_get(args, 1) 6748 needle = seq_get(args, 0) 6749 6750 return self.expression( 6751 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6752 ) 6753 6754 def _parse_predict(self) -> exp.Predict: 6755 self._match_text_seq("MODEL") 6756 this = self._parse_table() 6757 6758 self._match(TokenType.COMMA) 6759 self._match_text_seq("TABLE") 6760 6761 return self.expression( 6762 exp.Predict, 6763 this=this, 6764 expression=self._parse_table(), 6765 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6766 ) 6767 6768 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6769 args = self._parse_csv(self._parse_table) 6770 return exp.JoinHint(this=func_name.upper(), expressions=args) 6771 6772 def _parse_substring(self) -> exp.Substring: 6773 # Postgres supports the form: substring(string [from int] [for int]) 6774 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6775 6776 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6777 6778 if self._match(TokenType.FROM): 6779 args.append(self._parse_bitwise()) 6780 if self._match(TokenType.FOR): 6781 if len(args) == 1: 6782 args.append(exp.Literal.number(1)) 6783 args.append(self._parse_bitwise()) 6784 6785 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6786 6787 def _parse_trim(self) -> exp.Trim: 6788 # https://www.w3resource.com/sql/character-functions/trim.php 6789 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6790 6791 position = None 6792 collation = None 6793 expression = None 6794 6795 if self._match_texts(self.TRIM_TYPES): 6796 position = self._prev.text.upper() 6797 6798 this = self._parse_bitwise() 6799 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6800 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6801 expression = self._parse_bitwise() 6802 6803 if invert_order: 6804 this, expression = expression, this 6805 6806 if self._match(TokenType.COLLATE): 6807 collation = self._parse_bitwise() 6808 6809 return self.expression( 6810 exp.Trim, this=this, position=position, expression=expression, collation=collation 6811 ) 6812 6813 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6814 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6815 6816 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6817 return self._parse_window(self._parse_id_var(), alias=True) 6818 6819 def _parse_respect_or_ignore_nulls( 6820 self, this: t.Optional[exp.Expression] 6821 ) -> t.Optional[exp.Expression]: 6822 if self._match_text_seq("IGNORE", "NULLS"): 6823 return self.expression(exp.IgnoreNulls, this=this) 6824 if self._match_text_seq("RESPECT", "NULLS"): 6825 return self.expression(exp.RespectNulls, this=this) 6826 return this 6827 6828 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6829 if self._match(TokenType.HAVING): 6830 self._match_texts(("MAX", "MIN")) 6831 max = self._prev.text.upper() != "MIN" 6832 return self.expression( 6833 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6834 ) 6835 6836 return this 6837 6838 def _parse_window( 6839 self, this: t.Optional[exp.Expression], alias: bool = False 6840 ) -> t.Optional[exp.Expression]: 6841 func = this 6842 comments = func.comments if isinstance(func, exp.Expression) else None 6843 6844 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6845 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6846 if self._match_text_seq("WITHIN", "GROUP"): 6847 order = self._parse_wrapped(self._parse_order) 6848 this = self.expression(exp.WithinGroup, this=this, expression=order) 6849 6850 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6851 self._match(TokenType.WHERE) 6852 this = self.expression( 6853 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6854 ) 6855 self._match_r_paren() 6856 6857 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6858 # Some dialects choose to implement and some do not. 6859 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6860 6861 # There is some code above in _parse_lambda that handles 6862 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6863 6864 # The below changes handle 6865 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6866 6867 # Oracle allows both formats 6868 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6869 # and Snowflake chose to do the same for familiarity 6870 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6871 if isinstance(this, exp.AggFunc): 6872 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6873 6874 if ignore_respect and ignore_respect is not this: 6875 ignore_respect.replace(ignore_respect.this) 6876 this = self.expression(ignore_respect.__class__, this=this) 6877 6878 this = self._parse_respect_or_ignore_nulls(this) 6879 6880 # bigquery select from window x AS (partition by ...) 6881 if alias: 6882 over = None 6883 self._match(TokenType.ALIAS) 6884 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6885 return this 6886 else: 6887 over = self._prev.text.upper() 6888 6889 if comments and isinstance(func, exp.Expression): 6890 func.pop_comments() 6891 6892 if not self._match(TokenType.L_PAREN): 6893 return self.expression( 6894 exp.Window, 6895 comments=comments, 6896 this=this, 6897 alias=self._parse_id_var(False), 6898 over=over, 6899 ) 6900 6901 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6902 6903 first = self._match(TokenType.FIRST) 6904 if self._match_text_seq("LAST"): 6905 first = False 6906 6907 partition, order = self._parse_partition_and_order() 6908 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6909 6910 if kind: 6911 self._match(TokenType.BETWEEN) 6912 start = self._parse_window_spec() 6913 self._match(TokenType.AND) 6914 end = self._parse_window_spec() 6915 exclude = ( 6916 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6917 if self._match_text_seq("EXCLUDE") 6918 else None 6919 ) 6920 6921 spec = self.expression( 6922 exp.WindowSpec, 6923 kind=kind, 6924 start=start["value"], 6925 start_side=start["side"], 6926 end=end["value"], 6927 end_side=end["side"], 6928 exclude=exclude, 6929 ) 6930 else: 6931 spec = None 6932 6933 self._match_r_paren() 6934 6935 window = self.expression( 6936 exp.Window, 6937 comments=comments, 6938 this=this, 6939 partition_by=partition, 6940 order=order, 6941 spec=spec, 6942 alias=window_alias, 6943 over=over, 6944 first=first, 6945 ) 6946 6947 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6948 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6949 return self._parse_window(window, alias=alias) 6950 6951 return window 6952 6953 def _parse_partition_and_order( 6954 self, 6955 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6956 return self._parse_partition_by(), self._parse_order() 6957 6958 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6959 self._match(TokenType.BETWEEN) 6960 6961 return { 6962 "value": ( 6963 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6964 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6965 or self._parse_bitwise() 6966 ), 6967 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6968 } 6969 6970 def _parse_alias( 6971 self, this: t.Optional[exp.Expression], explicit: bool = False 6972 ) -> t.Optional[exp.Expression]: 6973 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6974 # so this section tries to parse the clause version and if it fails, it treats the token 6975 # as an identifier (alias) 6976 if self._can_parse_limit_or_offset(): 6977 return this 6978 6979 any_token = self._match(TokenType.ALIAS) 6980 comments = self._prev_comments or [] 6981 6982 if explicit and not any_token: 6983 return this 6984 6985 if self._match(TokenType.L_PAREN): 6986 aliases = self.expression( 6987 exp.Aliases, 6988 comments=comments, 6989 this=this, 6990 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6991 ) 6992 self._match_r_paren(aliases) 6993 return aliases 6994 6995 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6996 self.STRING_ALIASES and self._parse_string_as_identifier() 6997 ) 6998 6999 if alias: 7000 comments.extend(alias.pop_comments()) 7001 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7002 column = this.this 7003 7004 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7005 if not this.comments and column and column.comments: 7006 this.comments = column.pop_comments() 7007 7008 return this 7009 7010 def _parse_id_var( 7011 self, 7012 any_token: bool = True, 7013 tokens: t.Optional[t.Collection[TokenType]] = None, 7014 ) -> t.Optional[exp.Expression]: 7015 expression = self._parse_identifier() 7016 if not expression and ( 7017 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7018 ): 7019 quoted = self._prev.token_type == TokenType.STRING 7020 expression = self._identifier_expression(quoted=quoted) 7021 7022 return expression 7023 7024 def _parse_string(self) -> t.Optional[exp.Expression]: 7025 if self._match_set(self.STRING_PARSERS): 7026 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7027 return self._parse_placeholder() 7028 7029 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7030 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7031 if output: 7032 output.update_positions(self._prev) 7033 return output 7034 7035 def _parse_number(self) -> t.Optional[exp.Expression]: 7036 if self._match_set(self.NUMERIC_PARSERS): 7037 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7038 return self._parse_placeholder() 7039 7040 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7041 if self._match(TokenType.IDENTIFIER): 7042 return self._identifier_expression(quoted=True) 7043 return self._parse_placeholder() 7044 7045 def _parse_var( 7046 self, 7047 any_token: bool = False, 7048 tokens: t.Optional[t.Collection[TokenType]] = None, 7049 upper: bool = False, 7050 ) -> t.Optional[exp.Expression]: 7051 if ( 7052 (any_token and self._advance_any()) 7053 or self._match(TokenType.VAR) 7054 or (self._match_set(tokens) if tokens else False) 7055 ): 7056 return self.expression( 7057 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7058 ) 7059 return self._parse_placeholder() 7060 7061 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7062 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7063 self._advance() 7064 return self._prev 7065 return None 7066 7067 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7068 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7069 7070 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7071 return self._parse_primary() or self._parse_var(any_token=True) 7072 7073 def _parse_null(self) -> t.Optional[exp.Expression]: 7074 if self._match_set(self.NULL_TOKENS): 7075 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7076 return self._parse_placeholder() 7077 7078 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7079 if self._match(TokenType.TRUE): 7080 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7081 if self._match(TokenType.FALSE): 7082 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7083 return self._parse_placeholder() 7084 7085 def _parse_star(self) -> t.Optional[exp.Expression]: 7086 if self._match(TokenType.STAR): 7087 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7088 return self._parse_placeholder() 7089 7090 def _parse_parameter(self) -> exp.Parameter: 7091 this = self._parse_identifier() or self._parse_primary_or_var() 7092 return self.expression(exp.Parameter, this=this) 7093 7094 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7095 if self._match_set(self.PLACEHOLDER_PARSERS): 7096 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7097 if placeholder: 7098 return placeholder 7099 self._advance(-1) 7100 return None 7101 7102 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7103 if not self._match_texts(keywords): 7104 return None 7105 if self._match(TokenType.L_PAREN, advance=False): 7106 return self._parse_wrapped_csv(self._parse_expression) 7107 7108 expression = self._parse_expression() 7109 return [expression] if expression else None 7110 7111 def _parse_csv( 7112 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7113 ) -> t.List[exp.Expression]: 7114 parse_result = parse_method() 7115 items = [parse_result] if parse_result is not None else [] 7116 7117 while self._match(sep): 7118 self._add_comments(parse_result) 7119 parse_result = parse_method() 7120 if parse_result is not None: 7121 items.append(parse_result) 7122 7123 return items 7124 7125 def _parse_tokens( 7126 self, parse_method: t.Callable, expressions: t.Dict 7127 ) -> t.Optional[exp.Expression]: 7128 this = parse_method() 7129 7130 while self._match_set(expressions): 7131 this = self.expression( 7132 expressions[self._prev.token_type], 7133 this=this, 7134 comments=self._prev_comments, 7135 expression=parse_method(), 7136 ) 7137 7138 return this 7139 7140 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7141 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7142 7143 def _parse_wrapped_csv( 7144 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7145 ) -> t.List[exp.Expression]: 7146 return self._parse_wrapped( 7147 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7148 ) 7149 7150 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7151 wrapped = self._match(TokenType.L_PAREN) 7152 if not wrapped and not optional: 7153 self.raise_error("Expecting (") 7154 parse_result = parse_method() 7155 if wrapped: 7156 self._match_r_paren() 7157 return parse_result 7158 7159 def _parse_expressions(self) -> t.List[exp.Expression]: 7160 return self._parse_csv(self._parse_expression) 7161 7162 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7163 return self._parse_select() or self._parse_set_operations( 7164 self._parse_alias(self._parse_assignment(), explicit=True) 7165 if alias 7166 else self._parse_assignment() 7167 ) 7168 7169 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7170 return self._parse_query_modifiers( 7171 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7172 ) 7173 7174 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7175 this = None 7176 if self._match_texts(self.TRANSACTION_KIND): 7177 this = self._prev.text 7178 7179 self._match_texts(("TRANSACTION", "WORK")) 7180 7181 modes = [] 7182 while True: 7183 mode = [] 7184 while self._match(TokenType.VAR): 7185 mode.append(self._prev.text) 7186 7187 if mode: 7188 modes.append(" ".join(mode)) 7189 if not self._match(TokenType.COMMA): 7190 break 7191 7192 return self.expression(exp.Transaction, this=this, modes=modes) 7193 7194 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7195 chain = None 7196 savepoint = None 7197 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7198 7199 self._match_texts(("TRANSACTION", "WORK")) 7200 7201 if self._match_text_seq("TO"): 7202 self._match_text_seq("SAVEPOINT") 7203 savepoint = self._parse_id_var() 7204 7205 if self._match(TokenType.AND): 7206 chain = not self._match_text_seq("NO") 7207 self._match_text_seq("CHAIN") 7208 7209 if is_rollback: 7210 return self.expression(exp.Rollback, savepoint=savepoint) 7211 7212 return self.expression(exp.Commit, chain=chain) 7213 7214 def _parse_refresh(self) -> exp.Refresh: 7215 self._match(TokenType.TABLE) 7216 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7217 7218 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7219 if not self._match_text_seq("ADD"): 7220 return None 7221 7222 self._match(TokenType.COLUMN) 7223 exists_column = self._parse_exists(not_=True) 7224 expression = self._parse_field_def() 7225 7226 if expression: 7227 expression.set("exists", exists_column) 7228 7229 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7230 if self._match_texts(("FIRST", "AFTER")): 7231 position = self._prev.text 7232 column_position = self.expression( 7233 exp.ColumnPosition, this=self._parse_column(), position=position 7234 ) 7235 expression.set("position", column_position) 7236 7237 return expression 7238 7239 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7240 drop = self._match(TokenType.DROP) and self._parse_drop() 7241 if drop and not isinstance(drop, exp.Command): 7242 drop.set("kind", drop.args.get("kind", "COLUMN")) 7243 return drop 7244 7245 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7246 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7247 return self.expression( 7248 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7249 ) 7250 7251 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7252 index = self._index - 1 7253 7254 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7255 return self._parse_csv( 7256 lambda: self.expression( 7257 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7258 ) 7259 ) 7260 7261 self._retreat(index) 7262 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7263 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7264 7265 if self._match_text_seq("ADD", "COLUMNS"): 7266 schema = self._parse_schema() 7267 if schema: 7268 return [schema] 7269 return [] 7270 7271 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7272 7273 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7274 if self._match_texts(self.ALTER_ALTER_PARSERS): 7275 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7276 7277 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7278 # keyword after ALTER we default to parsing this statement 7279 self._match(TokenType.COLUMN) 7280 column = self._parse_field(any_token=True) 7281 7282 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7283 return self.expression(exp.AlterColumn, this=column, drop=True) 7284 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7285 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7286 if self._match(TokenType.COMMENT): 7287 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7288 if self._match_text_seq("DROP", "NOT", "NULL"): 7289 return self.expression( 7290 exp.AlterColumn, 7291 this=column, 7292 drop=True, 7293 allow_null=True, 7294 ) 7295 if self._match_text_seq("SET", "NOT", "NULL"): 7296 return self.expression( 7297 exp.AlterColumn, 7298 this=column, 7299 allow_null=False, 7300 ) 7301 7302 if self._match_text_seq("SET", "VISIBLE"): 7303 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7304 if self._match_text_seq("SET", "INVISIBLE"): 7305 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7306 7307 self._match_text_seq("SET", "DATA") 7308 self._match_text_seq("TYPE") 7309 return self.expression( 7310 exp.AlterColumn, 7311 this=column, 7312 dtype=self._parse_types(), 7313 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7314 using=self._match(TokenType.USING) and self._parse_assignment(), 7315 ) 7316 7317 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7318 if self._match_texts(("ALL", "EVEN", "AUTO")): 7319 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7320 7321 self._match_text_seq("KEY", "DISTKEY") 7322 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7323 7324 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7325 if compound: 7326 self._match_text_seq("SORTKEY") 7327 7328 if self._match(TokenType.L_PAREN, advance=False): 7329 return self.expression( 7330 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7331 ) 7332 7333 self._match_texts(("AUTO", "NONE")) 7334 return self.expression( 7335 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7336 ) 7337 7338 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7339 index = self._index - 1 7340 7341 partition_exists = self._parse_exists() 7342 if self._match(TokenType.PARTITION, advance=False): 7343 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7344 7345 self._retreat(index) 7346 return self._parse_csv(self._parse_drop_column) 7347 7348 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7349 if self._match(TokenType.COLUMN): 7350 exists = self._parse_exists() 7351 old_column = self._parse_column() 7352 to = self._match_text_seq("TO") 7353 new_column = self._parse_column() 7354 7355 if old_column is None or to is None or new_column is None: 7356 return None 7357 7358 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7359 7360 self._match_text_seq("TO") 7361 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7362 7363 def _parse_alter_table_set(self) -> exp.AlterSet: 7364 alter_set = self.expression(exp.AlterSet) 7365 7366 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7367 "TABLE", "PROPERTIES" 7368 ): 7369 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7370 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7371 alter_set.set("expressions", [self._parse_assignment()]) 7372 elif self._match_texts(("LOGGED", "UNLOGGED")): 7373 alter_set.set("option", exp.var(self._prev.text.upper())) 7374 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7375 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7376 elif self._match_text_seq("LOCATION"): 7377 alter_set.set("location", self._parse_field()) 7378 elif self._match_text_seq("ACCESS", "METHOD"): 7379 alter_set.set("access_method", self._parse_field()) 7380 elif self._match_text_seq("TABLESPACE"): 7381 alter_set.set("tablespace", self._parse_field()) 7382 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7383 alter_set.set("file_format", [self._parse_field()]) 7384 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7385 alter_set.set("file_format", self._parse_wrapped_options()) 7386 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7387 alter_set.set("copy_options", self._parse_wrapped_options()) 7388 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7389 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7390 else: 7391 if self._match_text_seq("SERDE"): 7392 alter_set.set("serde", self._parse_field()) 7393 7394 alter_set.set("expressions", [self._parse_properties()]) 7395 7396 return alter_set 7397 7398 def _parse_alter(self) -> exp.Alter | exp.Command: 7399 start = self._prev 7400 7401 alter_token = self._match_set(self.ALTERABLES) and self._prev 7402 if not alter_token: 7403 return self._parse_as_command(start) 7404 7405 exists = self._parse_exists() 7406 only = self._match_text_seq("ONLY") 7407 this = self._parse_table(schema=True) 7408 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7409 7410 if self._next: 7411 self._advance() 7412 7413 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7414 if parser: 7415 actions = ensure_list(parser(self)) 7416 not_valid = self._match_text_seq("NOT", "VALID") 7417 options = self._parse_csv(self._parse_property) 7418 7419 if not self._curr and actions: 7420 return self.expression( 7421 exp.Alter, 7422 this=this, 7423 kind=alter_token.text.upper(), 7424 exists=exists, 7425 actions=actions, 7426 only=only, 7427 options=options, 7428 cluster=cluster, 7429 not_valid=not_valid, 7430 ) 7431 7432 return self._parse_as_command(start) 7433 7434 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7435 start = self._prev 7436 # https://duckdb.org/docs/sql/statements/analyze 7437 if not self._curr: 7438 return self.expression(exp.Analyze) 7439 7440 options = [] 7441 while self._match_texts(self.ANALYZE_STYLES): 7442 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7443 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7444 else: 7445 options.append(self._prev.text.upper()) 7446 7447 this: t.Optional[exp.Expression] = None 7448 inner_expression: t.Optional[exp.Expression] = None 7449 7450 kind = self._curr and self._curr.text.upper() 7451 7452 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7453 this = self._parse_table_parts() 7454 elif self._match_text_seq("TABLES"): 7455 if self._match_set((TokenType.FROM, TokenType.IN)): 7456 kind = f"{kind} {self._prev.text.upper()}" 7457 this = self._parse_table(schema=True, is_db_reference=True) 7458 elif self._match_text_seq("DATABASE"): 7459 this = self._parse_table(schema=True, is_db_reference=True) 7460 elif self._match_text_seq("CLUSTER"): 7461 this = self._parse_table() 7462 # Try matching inner expr keywords before fallback to parse table. 7463 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7464 kind = None 7465 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7466 else: 7467 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7468 kind = None 7469 this = self._parse_table_parts() 7470 7471 partition = self._try_parse(self._parse_partition) 7472 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7473 return self._parse_as_command(start) 7474 7475 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7476 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7477 "WITH", "ASYNC", "MODE" 7478 ): 7479 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7480 else: 7481 mode = None 7482 7483 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7484 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7485 7486 properties = self._parse_properties() 7487 return self.expression( 7488 exp.Analyze, 7489 kind=kind, 7490 this=this, 7491 mode=mode, 7492 partition=partition, 7493 properties=properties, 7494 expression=inner_expression, 7495 options=options, 7496 ) 7497 7498 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7499 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7500 this = None 7501 kind = self._prev.text.upper() 7502 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7503 expressions = [] 7504 7505 if not self._match_text_seq("STATISTICS"): 7506 self.raise_error("Expecting token STATISTICS") 7507 7508 if self._match_text_seq("NOSCAN"): 7509 this = "NOSCAN" 7510 elif self._match(TokenType.FOR): 7511 if self._match_text_seq("ALL", "COLUMNS"): 7512 this = "FOR ALL COLUMNS" 7513 if self._match_texts("COLUMNS"): 7514 this = "FOR COLUMNS" 7515 expressions = self._parse_csv(self._parse_column_reference) 7516 elif self._match_text_seq("SAMPLE"): 7517 sample = self._parse_number() 7518 expressions = [ 7519 self.expression( 7520 exp.AnalyzeSample, 7521 sample=sample, 7522 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7523 ) 7524 ] 7525 7526 return self.expression( 7527 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7528 ) 7529 7530 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7531 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7532 kind = None 7533 this = None 7534 expression: t.Optional[exp.Expression] = None 7535 if self._match_text_seq("REF", "UPDATE"): 7536 kind = "REF" 7537 this = "UPDATE" 7538 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7539 this = "UPDATE SET DANGLING TO NULL" 7540 elif self._match_text_seq("STRUCTURE"): 7541 kind = "STRUCTURE" 7542 if self._match_text_seq("CASCADE", "FAST"): 7543 this = "CASCADE FAST" 7544 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7545 ("ONLINE", "OFFLINE") 7546 ): 7547 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7548 expression = self._parse_into() 7549 7550 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7551 7552 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7553 this = self._prev.text.upper() 7554 if self._match_text_seq("COLUMNS"): 7555 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7556 return None 7557 7558 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7559 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7560 if self._match_text_seq("STATISTICS"): 7561 return self.expression(exp.AnalyzeDelete, kind=kind) 7562 return None 7563 7564 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7565 if self._match_text_seq("CHAINED", "ROWS"): 7566 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7567 return None 7568 7569 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7570 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7571 this = self._prev.text.upper() 7572 expression: t.Optional[exp.Expression] = None 7573 expressions = [] 7574 update_options = None 7575 7576 if self._match_text_seq("HISTOGRAM", "ON"): 7577 expressions = self._parse_csv(self._parse_column_reference) 7578 with_expressions = [] 7579 while self._match(TokenType.WITH): 7580 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7581 if self._match_texts(("SYNC", "ASYNC")): 7582 if self._match_text_seq("MODE", advance=False): 7583 with_expressions.append(f"{self._prev.text.upper()} MODE") 7584 self._advance() 7585 else: 7586 buckets = self._parse_number() 7587 if self._match_text_seq("BUCKETS"): 7588 with_expressions.append(f"{buckets} BUCKETS") 7589 if with_expressions: 7590 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7591 7592 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7593 TokenType.UPDATE, advance=False 7594 ): 7595 update_options = self._prev.text.upper() 7596 self._advance() 7597 elif self._match_text_seq("USING", "DATA"): 7598 expression = self.expression(exp.UsingData, this=self._parse_string()) 7599 7600 return self.expression( 7601 exp.AnalyzeHistogram, 7602 this=this, 7603 expressions=expressions, 7604 expression=expression, 7605 update_options=update_options, 7606 ) 7607 7608 def _parse_merge(self) -> exp.Merge: 7609 self._match(TokenType.INTO) 7610 target = self._parse_table() 7611 7612 if target and self._match(TokenType.ALIAS, advance=False): 7613 target.set("alias", self._parse_table_alias()) 7614 7615 self._match(TokenType.USING) 7616 using = self._parse_table() 7617 7618 self._match(TokenType.ON) 7619 on = self._parse_assignment() 7620 7621 return self.expression( 7622 exp.Merge, 7623 this=target, 7624 using=using, 7625 on=on, 7626 whens=self._parse_when_matched(), 7627 returning=self._parse_returning(), 7628 ) 7629 7630 def _parse_when_matched(self) -> exp.Whens: 7631 whens = [] 7632 7633 while self._match(TokenType.WHEN): 7634 matched = not self._match(TokenType.NOT) 7635 self._match_text_seq("MATCHED") 7636 source = ( 7637 False 7638 if self._match_text_seq("BY", "TARGET") 7639 else self._match_text_seq("BY", "SOURCE") 7640 ) 7641 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7642 7643 self._match(TokenType.THEN) 7644 7645 if self._match(TokenType.INSERT): 7646 this = self._parse_star() 7647 if this: 7648 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7649 else: 7650 then = self.expression( 7651 exp.Insert, 7652 this=exp.var("ROW") 7653 if self._match_text_seq("ROW") 7654 else self._parse_value(values=False), 7655 expression=self._match_text_seq("VALUES") and self._parse_value(), 7656 ) 7657 elif self._match(TokenType.UPDATE): 7658 expressions = self._parse_star() 7659 if expressions: 7660 then = self.expression(exp.Update, expressions=expressions) 7661 else: 7662 then = self.expression( 7663 exp.Update, 7664 expressions=self._match(TokenType.SET) 7665 and self._parse_csv(self._parse_equality), 7666 ) 7667 elif self._match(TokenType.DELETE): 7668 then = self.expression(exp.Var, this=self._prev.text) 7669 else: 7670 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7671 7672 whens.append( 7673 self.expression( 7674 exp.When, 7675 matched=matched, 7676 source=source, 7677 condition=condition, 7678 then=then, 7679 ) 7680 ) 7681 return self.expression(exp.Whens, expressions=whens) 7682 7683 def _parse_show(self) -> t.Optional[exp.Expression]: 7684 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7685 if parser: 7686 return parser(self) 7687 return self._parse_as_command(self._prev) 7688 7689 def _parse_set_item_assignment( 7690 self, kind: t.Optional[str] = None 7691 ) -> t.Optional[exp.Expression]: 7692 index = self._index 7693 7694 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7695 return self._parse_set_transaction(global_=kind == "GLOBAL") 7696 7697 left = self._parse_primary() or self._parse_column() 7698 assignment_delimiter = self._match_texts(("=", "TO")) 7699 7700 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7701 self._retreat(index) 7702 return None 7703 7704 right = self._parse_statement() or self._parse_id_var() 7705 if isinstance(right, (exp.Column, exp.Identifier)): 7706 right = exp.var(right.name) 7707 7708 this = self.expression(exp.EQ, this=left, expression=right) 7709 return self.expression(exp.SetItem, this=this, kind=kind) 7710 7711 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7712 self._match_text_seq("TRANSACTION") 7713 characteristics = self._parse_csv( 7714 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7715 ) 7716 return self.expression( 7717 exp.SetItem, 7718 expressions=characteristics, 7719 kind="TRANSACTION", 7720 **{"global": global_}, # type: ignore 7721 ) 7722 7723 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7724 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7725 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7726 7727 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7728 index = self._index 7729 set_ = self.expression( 7730 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7731 ) 7732 7733 if self._curr: 7734 self._retreat(index) 7735 return self._parse_as_command(self._prev) 7736 7737 return set_ 7738 7739 def _parse_var_from_options( 7740 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7741 ) -> t.Optional[exp.Var]: 7742 start = self._curr 7743 if not start: 7744 return None 7745 7746 option = start.text.upper() 7747 continuations = options.get(option) 7748 7749 index = self._index 7750 self._advance() 7751 for keywords in continuations or []: 7752 if isinstance(keywords, str): 7753 keywords = (keywords,) 7754 7755 if self._match_text_seq(*keywords): 7756 option = f"{option} {' '.join(keywords)}" 7757 break 7758 else: 7759 if continuations or continuations is None: 7760 if raise_unmatched: 7761 self.raise_error(f"Unknown option {option}") 7762 7763 self._retreat(index) 7764 return None 7765 7766 return exp.var(option) 7767 7768 def _parse_as_command(self, start: Token) -> exp.Command: 7769 while self._curr: 7770 self._advance() 7771 text = self._find_sql(start, self._prev) 7772 size = len(start.text) 7773 self._warn_unsupported() 7774 return exp.Command(this=text[:size], expression=text[size:]) 7775 7776 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7777 settings = [] 7778 7779 self._match_l_paren() 7780 kind = self._parse_id_var() 7781 7782 if self._match(TokenType.L_PAREN): 7783 while True: 7784 key = self._parse_id_var() 7785 value = self._parse_primary() 7786 if not key and value is None: 7787 break 7788 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7789 self._match(TokenType.R_PAREN) 7790 7791 self._match_r_paren() 7792 7793 return self.expression( 7794 exp.DictProperty, 7795 this=this, 7796 kind=kind.this if kind else None, 7797 settings=settings, 7798 ) 7799 7800 def _parse_dict_range(self, this: str) -> exp.DictRange: 7801 self._match_l_paren() 7802 has_min = self._match_text_seq("MIN") 7803 if has_min: 7804 min = self._parse_var() or self._parse_primary() 7805 self._match_text_seq("MAX") 7806 max = self._parse_var() or self._parse_primary() 7807 else: 7808 max = self._parse_var() or self._parse_primary() 7809 min = exp.Literal.number(0) 7810 self._match_r_paren() 7811 return self.expression(exp.DictRange, this=this, min=min, max=max) 7812 7813 def _parse_comprehension( 7814 self, this: t.Optional[exp.Expression] 7815 ) -> t.Optional[exp.Comprehension]: 7816 index = self._index 7817 expression = self._parse_column() 7818 if not self._match(TokenType.IN): 7819 self._retreat(index - 1) 7820 return None 7821 iterator = self._parse_column() 7822 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7823 return self.expression( 7824 exp.Comprehension, 7825 this=this, 7826 expression=expression, 7827 iterator=iterator, 7828 condition=condition, 7829 ) 7830 7831 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7832 if self._match(TokenType.HEREDOC_STRING): 7833 return self.expression(exp.Heredoc, this=self._prev.text) 7834 7835 if not self._match_text_seq("$"): 7836 return None 7837 7838 tags = ["$"] 7839 tag_text = None 7840 7841 if self._is_connected(): 7842 self._advance() 7843 tags.append(self._prev.text.upper()) 7844 else: 7845 self.raise_error("No closing $ found") 7846 7847 if tags[-1] != "$": 7848 if self._is_connected() and self._match_text_seq("$"): 7849 tag_text = tags[-1] 7850 tags.append("$") 7851 else: 7852 self.raise_error("No closing $ found") 7853 7854 heredoc_start = self._curr 7855 7856 while self._curr: 7857 if self._match_text_seq(*tags, advance=False): 7858 this = self._find_sql(heredoc_start, self._prev) 7859 self._advance(len(tags)) 7860 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7861 7862 self._advance() 7863 7864 self.raise_error(f"No closing {''.join(tags)} found") 7865 return None 7866 7867 def _find_parser( 7868 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7869 ) -> t.Optional[t.Callable]: 7870 if not self._curr: 7871 return None 7872 7873 index = self._index 7874 this = [] 7875 while True: 7876 # The current token might be multiple words 7877 curr = self._curr.text.upper() 7878 key = curr.split(" ") 7879 this.append(curr) 7880 7881 self._advance() 7882 result, trie = in_trie(trie, key) 7883 if result == TrieResult.FAILED: 7884 break 7885 7886 if result == TrieResult.EXISTS: 7887 subparser = parsers[" ".join(this)] 7888 return subparser 7889 7890 self._retreat(index) 7891 return None 7892 7893 def _match(self, token_type, advance=True, expression=None): 7894 if not self._curr: 7895 return None 7896 7897 if self._curr.token_type == token_type: 7898 if advance: 7899 self._advance() 7900 self._add_comments(expression) 7901 return True 7902 7903 return None 7904 7905 def _match_set(self, types, advance=True): 7906 if not self._curr: 7907 return None 7908 7909 if self._curr.token_type in types: 7910 if advance: 7911 self._advance() 7912 return True 7913 7914 return None 7915 7916 def _match_pair(self, token_type_a, token_type_b, advance=True): 7917 if not self._curr or not self._next: 7918 return None 7919 7920 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7921 if advance: 7922 self._advance(2) 7923 return True 7924 7925 return None 7926 7927 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7928 if not self._match(TokenType.L_PAREN, expression=expression): 7929 self.raise_error("Expecting (") 7930 7931 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7932 if not self._match(TokenType.R_PAREN, expression=expression): 7933 self.raise_error("Expecting )") 7934 7935 def _match_texts(self, texts, advance=True): 7936 if ( 7937 self._curr 7938 and self._curr.token_type != TokenType.STRING 7939 and self._curr.text.upper() in texts 7940 ): 7941 if advance: 7942 self._advance() 7943 return True 7944 return None 7945 7946 def _match_text_seq(self, *texts, advance=True): 7947 index = self._index 7948 for text in texts: 7949 if ( 7950 self._curr 7951 and self._curr.token_type != TokenType.STRING 7952 and self._curr.text.upper() == text 7953 ): 7954 self._advance() 7955 else: 7956 self._retreat(index) 7957 return None 7958 7959 if not advance: 7960 self._retreat(index) 7961 7962 return True 7963 7964 def _replace_lambda( 7965 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7966 ) -> t.Optional[exp.Expression]: 7967 if not node: 7968 return node 7969 7970 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7971 7972 for column in node.find_all(exp.Column): 7973 typ = lambda_types.get(column.parts[0].name) 7974 if typ is not None: 7975 dot_or_id = column.to_dot() if column.table else column.this 7976 7977 if typ: 7978 dot_or_id = self.expression( 7979 exp.Cast, 7980 this=dot_or_id, 7981 to=typ, 7982 ) 7983 7984 parent = column.parent 7985 7986 while isinstance(parent, exp.Dot): 7987 if not isinstance(parent.parent, exp.Dot): 7988 parent.replace(dot_or_id) 7989 break 7990 parent = parent.parent 7991 else: 7992 if column is node: 7993 node = dot_or_id 7994 else: 7995 column.replace(dot_or_id) 7996 return node 7997 7998 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7999 start = self._prev 8000 8001 # Not to be confused with TRUNCATE(number, decimals) function call 8002 if self._match(TokenType.L_PAREN): 8003 self._retreat(self._index - 2) 8004 return self._parse_function() 8005 8006 # Clickhouse supports TRUNCATE DATABASE as well 8007 is_database = self._match(TokenType.DATABASE) 8008 8009 self._match(TokenType.TABLE) 8010 8011 exists = self._parse_exists(not_=False) 8012 8013 expressions = self._parse_csv( 8014 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8015 ) 8016 8017 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8018 8019 if self._match_text_seq("RESTART", "IDENTITY"): 8020 identity = "RESTART" 8021 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8022 identity = "CONTINUE" 8023 else: 8024 identity = None 8025 8026 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8027 option = self._prev.text 8028 else: 8029 option = None 8030 8031 partition = self._parse_partition() 8032 8033 # Fallback case 8034 if self._curr: 8035 return self._parse_as_command(start) 8036 8037 return self.expression( 8038 exp.TruncateTable, 8039 expressions=expressions, 8040 is_database=is_database, 8041 exists=exists, 8042 cluster=cluster, 8043 identity=identity, 8044 option=option, 8045 partition=partition, 8046 ) 8047 8048 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8049 this = self._parse_ordered(self._parse_opclass) 8050 8051 if not self._match(TokenType.WITH): 8052 return this 8053 8054 op = self._parse_var(any_token=True) 8055 8056 return self.expression(exp.WithOperator, this=this, op=op) 8057 8058 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8059 self._match(TokenType.EQ) 8060 self._match(TokenType.L_PAREN) 8061 8062 opts: t.List[t.Optional[exp.Expression]] = [] 8063 option: exp.Expression | None 8064 while self._curr and not self._match(TokenType.R_PAREN): 8065 if self._match_text_seq("FORMAT_NAME", "="): 8066 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8067 option = self._parse_format_name() 8068 else: 8069 option = self._parse_property() 8070 8071 if option is None: 8072 self.raise_error("Unable to parse option") 8073 break 8074 8075 opts.append(option) 8076 8077 return opts 8078 8079 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8080 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8081 8082 options = [] 8083 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8084 option = self._parse_var(any_token=True) 8085 prev = self._prev.text.upper() 8086 8087 # Different dialects might separate options and values by white space, "=" and "AS" 8088 self._match(TokenType.EQ) 8089 self._match(TokenType.ALIAS) 8090 8091 param = self.expression(exp.CopyParameter, this=option) 8092 8093 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8094 TokenType.L_PAREN, advance=False 8095 ): 8096 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8097 param.set("expressions", self._parse_wrapped_options()) 8098 elif prev == "FILE_FORMAT": 8099 # T-SQL's external file format case 8100 param.set("expression", self._parse_field()) 8101 else: 8102 param.set("expression", self._parse_unquoted_field()) 8103 8104 options.append(param) 8105 self._match(sep) 8106 8107 return options 8108 8109 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8110 expr = self.expression(exp.Credentials) 8111 8112 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8113 expr.set("storage", self._parse_field()) 8114 if self._match_text_seq("CREDENTIALS"): 8115 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8116 creds = ( 8117 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8118 ) 8119 expr.set("credentials", creds) 8120 if self._match_text_seq("ENCRYPTION"): 8121 expr.set("encryption", self._parse_wrapped_options()) 8122 if self._match_text_seq("IAM_ROLE"): 8123 expr.set("iam_role", self._parse_field()) 8124 if self._match_text_seq("REGION"): 8125 expr.set("region", self._parse_field()) 8126 8127 return expr 8128 8129 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8130 return self._parse_field() 8131 8132 def _parse_copy(self) -> exp.Copy | exp.Command: 8133 start = self._prev 8134 8135 self._match(TokenType.INTO) 8136 8137 this = ( 8138 self._parse_select(nested=True, parse_subquery_alias=False) 8139 if self._match(TokenType.L_PAREN, advance=False) 8140 else self._parse_table(schema=True) 8141 ) 8142 8143 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8144 8145 files = self._parse_csv(self._parse_file_location) 8146 credentials = self._parse_credentials() 8147 8148 self._match_text_seq("WITH") 8149 8150 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8151 8152 # Fallback case 8153 if self._curr: 8154 return self._parse_as_command(start) 8155 8156 return self.expression( 8157 exp.Copy, 8158 this=this, 8159 kind=kind, 8160 credentials=credentials, 8161 files=files, 8162 params=params, 8163 ) 8164 8165 def _parse_normalize(self) -> exp.Normalize: 8166 return self.expression( 8167 exp.Normalize, 8168 this=self._parse_bitwise(), 8169 form=self._match(TokenType.COMMA) and self._parse_var(), 8170 ) 8171 8172 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8173 args = self._parse_csv(lambda: self._parse_lambda()) 8174 8175 this = seq_get(args, 0) 8176 decimals = seq_get(args, 1) 8177 8178 return expr_type( 8179 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8180 ) 8181 8182 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8183 if self._match_text_seq("COLUMNS", "(", advance=False): 8184 this = self._parse_function() 8185 if isinstance(this, exp.Columns): 8186 this.set("unpack", True) 8187 return this 8188 8189 return self.expression( 8190 exp.Star, 8191 **{ # type: ignore 8192 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8193 "replace": self._parse_star_op("REPLACE"), 8194 "rename": self._parse_star_op("RENAME"), 8195 }, 8196 ) 8197 8198 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8199 privilege_parts = [] 8200 8201 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8202 # (end of privilege list) or L_PAREN (start of column list) are met 8203 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8204 privilege_parts.append(self._curr.text.upper()) 8205 self._advance() 8206 8207 this = exp.var(" ".join(privilege_parts)) 8208 expressions = ( 8209 self._parse_wrapped_csv(self._parse_column) 8210 if self._match(TokenType.L_PAREN, advance=False) 8211 else None 8212 ) 8213 8214 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8215 8216 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8217 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8218 principal = self._parse_id_var() 8219 8220 if not principal: 8221 return None 8222 8223 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8224 8225 def _parse_grant(self) -> exp.Grant | exp.Command: 8226 start = self._prev 8227 8228 privileges = self._parse_csv(self._parse_grant_privilege) 8229 8230 self._match(TokenType.ON) 8231 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8232 8233 # Attempt to parse the securable e.g. MySQL allows names 8234 # such as "foo.*", "*.*" which are not easily parseable yet 8235 securable = self._try_parse(self._parse_table_parts) 8236 8237 if not securable or not self._match_text_seq("TO"): 8238 return self._parse_as_command(start) 8239 8240 principals = self._parse_csv(self._parse_grant_principal) 8241 8242 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8243 8244 if self._curr: 8245 return self._parse_as_command(start) 8246 8247 return self.expression( 8248 exp.Grant, 8249 privileges=privileges, 8250 kind=kind, 8251 securable=securable, 8252 principals=principals, 8253 grant_option=grant_option, 8254 ) 8255 8256 def _parse_overlay(self) -> exp.Overlay: 8257 return self.expression( 8258 exp.Overlay, 8259 **{ # type: ignore 8260 "this": self._parse_bitwise(), 8261 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8262 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8263 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8264 }, 8265 ) 8266 8267 def _parse_format_name(self) -> exp.Property: 8268 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8269 # for FILE_FORMAT = <format_name> 8270 return self.expression( 8271 exp.Property, 8272 this=exp.var("FORMAT_NAME"), 8273 value=self._parse_string() or self._parse_table_parts(), 8274 ) 8275 8276 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8277 args: t.List[exp.Expression] = [] 8278 8279 if self._match(TokenType.DISTINCT): 8280 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8281 self._match(TokenType.COMMA) 8282 8283 args.extend(self._parse_csv(self._parse_assignment)) 8284 8285 return self.expression( 8286 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8287 ) 8288 8289 def _identifier_expression( 8290 self, token: t.Optional[Token] = None, **kwargs: t.Any 8291 ) -> exp.Identifier: 8292 token = token or self._prev 8293 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8294 expression.update_positions(token) 8295 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 935 "ALLOWED_VALUES": lambda self: self.expression( 936 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 937 ), 938 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 939 "AUTO": lambda self: self._parse_auto_property(), 940 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 941 "BACKUP": lambda self: self.expression( 942 exp.BackupProperty, this=self._parse_var(any_token=True) 943 ), 944 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 945 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 947 "CHECKSUM": lambda self: self._parse_checksum(), 948 "CLUSTER BY": lambda self: self._parse_cluster(), 949 "CLUSTERED": lambda self: self._parse_clustered_by(), 950 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 951 exp.CollateProperty, **kwargs 952 ), 953 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 954 "CONTAINS": lambda self: self._parse_contains_property(), 955 "COPY": lambda self: self._parse_copy_property(), 956 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 957 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 958 "DEFINER": lambda self: self._parse_definer(), 959 "DETERMINISTIC": lambda self: self.expression( 960 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 961 ), 962 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 963 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 964 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 965 "DISTKEY": lambda self: self._parse_distkey(), 966 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 967 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 968 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 969 "ENVIRONMENT": lambda self: self.expression( 970 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 971 ), 972 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 973 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 974 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 975 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 976 "FREESPACE": lambda self: self._parse_freespace(), 977 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 978 "HEAP": lambda self: self.expression(exp.HeapProperty), 979 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 980 "IMMUTABLE": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "INHERITS": lambda self: self.expression( 984 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 985 ), 986 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 987 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 988 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 989 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 990 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 991 "LIKE": lambda self: self._parse_create_like(), 992 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 993 "LOCK": lambda self: self._parse_locking(), 994 "LOCKING": lambda self: self._parse_locking(), 995 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 996 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 997 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 998 "MODIFIES": lambda self: self._parse_modifies_property(), 999 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1000 "NO": lambda self: self._parse_no_property(), 1001 "ON": lambda self: self._parse_on_property(), 1002 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1003 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1004 "PARTITION": lambda self: self._parse_partitioned_of(), 1005 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1007 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1008 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1009 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1010 "READS": lambda self: self._parse_reads_property(), 1011 "REMOTE": lambda self: self._parse_remote_with_connection(), 1012 "RETURNS": lambda self: self._parse_returns(), 1013 "STRICT": lambda self: self.expression(exp.StrictProperty), 1014 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1015 "ROW": lambda self: self._parse_row(), 1016 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1017 "SAMPLE": lambda self: self.expression( 1018 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1019 ), 1020 "SECURE": lambda self: self.expression(exp.SecureProperty), 1021 "SECURITY": lambda self: self._parse_security(), 1022 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1023 "SETTINGS": lambda self: self._parse_settings_property(), 1024 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1025 "SORTKEY": lambda self: self._parse_sortkey(), 1026 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1027 "STABLE": lambda self: self.expression( 1028 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1029 ), 1030 "STORED": lambda self: self._parse_stored(), 1031 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1032 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1033 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1034 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1035 "TO": lambda self: self._parse_to_table(), 1036 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1037 "TRANSFORM": lambda self: self.expression( 1038 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1039 ), 1040 "TTL": lambda self: self._parse_ttl(), 1041 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1042 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1043 "VOLATILE": lambda self: self._parse_volatile_property(), 1044 "WITH": lambda self: self._parse_with_property(), 1045 } 1046 1047 CONSTRAINT_PARSERS = { 1048 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1049 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1050 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1051 "CHARACTER SET": lambda self: self.expression( 1052 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "CHECK": lambda self: self.expression( 1055 exp.CheckColumnConstraint, 1056 this=self._parse_wrapped(self._parse_assignment), 1057 enforced=self._match_text_seq("ENFORCED"), 1058 ), 1059 "COLLATE": lambda self: self.expression( 1060 exp.CollateColumnConstraint, 1061 this=self._parse_identifier() or self._parse_column(), 1062 ), 1063 "COMMENT": lambda self: self.expression( 1064 exp.CommentColumnConstraint, this=self._parse_string() 1065 ), 1066 "COMPRESS": lambda self: self._parse_compress(), 1067 "CLUSTERED": lambda self: self.expression( 1068 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1069 ), 1070 "NONCLUSTERED": lambda self: self.expression( 1071 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1072 ), 1073 "DEFAULT": lambda self: self.expression( 1074 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1075 ), 1076 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1077 "EPHEMERAL": lambda self: self.expression( 1078 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1079 ), 1080 "EXCLUDE": lambda self: self.expression( 1081 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1082 ), 1083 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1084 "FORMAT": lambda self: self.expression( 1085 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "GENERATED": lambda self: self._parse_generated_as_identity(), 1088 "IDENTITY": lambda self: self._parse_auto_increment(), 1089 "INLINE": lambda self: self._parse_inline(), 1090 "LIKE": lambda self: self._parse_create_like(), 1091 "NOT": lambda self: self._parse_not_constraint(), 1092 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1093 "ON": lambda self: ( 1094 self._match(TokenType.UPDATE) 1095 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1096 ) 1097 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1098 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1099 "PERIOD": lambda self: self._parse_period_for_system_time(), 1100 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1101 "REFERENCES": lambda self: self._parse_references(match=False), 1102 "TITLE": lambda self: self.expression( 1103 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1104 ), 1105 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1106 "UNIQUE": lambda self: self._parse_unique(), 1107 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1108 "WATERMARK": lambda self: self.expression( 1109 exp.WatermarkColumnConstraint, 1110 this=self._match(TokenType.FOR) and self._parse_column(), 1111 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1112 ), 1113 "WITH": lambda self: self.expression( 1114 exp.Properties, expressions=self._parse_wrapped_properties() 1115 ), 1116 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1118 } 1119 1120 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1121 klass = ( 1122 exp.PartitionedByBucket 1123 if self._prev.text.upper() == "BUCKET" 1124 else exp.PartitionByTruncate 1125 ) 1126 1127 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1128 this, expression = seq_get(args, 0), seq_get(args, 1) 1129 1130 if isinstance(this, exp.Literal): 1131 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1132 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1133 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1134 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1135 # 1136 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1137 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1138 this, expression = expression, this 1139 1140 return self.expression(klass, this=this, expression=expression) 1141 1142 ALTER_PARSERS = { 1143 "ADD": lambda self: self._parse_alter_table_add(), 1144 "AS": lambda self: self._parse_select(), 1145 "ALTER": lambda self: self._parse_alter_table_alter(), 1146 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1147 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1148 "DROP": lambda self: self._parse_alter_table_drop(), 1149 "RENAME": lambda self: self._parse_alter_table_rename(), 1150 "SET": lambda self: self._parse_alter_table_set(), 1151 "SWAP": lambda self: self.expression( 1152 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1153 ), 1154 } 1155 1156 ALTER_ALTER_PARSERS = { 1157 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1158 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1159 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1160 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1161 } 1162 1163 SCHEMA_UNNAMED_CONSTRAINTS = { 1164 "CHECK", 1165 "EXCLUDE", 1166 "FOREIGN KEY", 1167 "LIKE", 1168 "PERIOD", 1169 "PRIMARY KEY", 1170 "UNIQUE", 1171 "WATERMARK", 1172 "BUCKET", 1173 "TRUNCATE", 1174 } 1175 1176 NO_PAREN_FUNCTION_PARSERS = { 1177 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1178 "CASE": lambda self: self._parse_case(), 1179 "CONNECT_BY_ROOT": lambda self: self.expression( 1180 exp.ConnectByRoot, this=self._parse_column() 1181 ), 1182 "IF": lambda self: self._parse_if(), 1183 } 1184 1185 INVALID_FUNC_NAME_TOKENS = { 1186 TokenType.IDENTIFIER, 1187 TokenType.STRING, 1188 } 1189 1190 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1191 1192 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1193 1194 FUNCTION_PARSERS = { 1195 **{ 1196 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1197 }, 1198 **{ 1199 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1200 }, 1201 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1202 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1203 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1204 "DECODE": lambda self: self._parse_decode(), 1205 "EXTRACT": lambda self: self._parse_extract(), 1206 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1207 "GAP_FILL": lambda self: self._parse_gap_fill(), 1208 "JSON_OBJECT": lambda self: self._parse_json_object(), 1209 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1210 "JSON_TABLE": lambda self: self._parse_json_table(), 1211 "MATCH": lambda self: self._parse_match_against(), 1212 "NORMALIZE": lambda self: self._parse_normalize(), 1213 "OPENJSON": lambda self: self._parse_open_json(), 1214 "OVERLAY": lambda self: self._parse_overlay(), 1215 "POSITION": lambda self: self._parse_position(), 1216 "PREDICT": lambda self: self._parse_predict(), 1217 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1218 "STRING_AGG": lambda self: self._parse_string_agg(), 1219 "SUBSTRING": lambda self: self._parse_substring(), 1220 "TRIM": lambda self: self._parse_trim(), 1221 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1222 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1223 "XMLELEMENT": lambda self: self.expression( 1224 exp.XMLElement, 1225 this=self._match_text_seq("NAME") and self._parse_id_var(), 1226 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1227 ), 1228 "XMLTABLE": lambda self: self._parse_xml_table(), 1229 } 1230 1231 QUERY_MODIFIER_PARSERS = { 1232 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1233 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1234 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1235 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1236 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1237 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1238 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1239 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1240 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1241 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1242 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1243 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1244 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1245 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1247 TokenType.CLUSTER_BY: lambda self: ( 1248 "cluster", 1249 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1250 ), 1251 TokenType.DISTRIBUTE_BY: lambda self: ( 1252 "distribute", 1253 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1254 ), 1255 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1256 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1257 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1258 } 1259 1260 SET_PARSERS = { 1261 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1262 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1263 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1264 "TRANSACTION": lambda self: self._parse_set_transaction(), 1265 } 1266 1267 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1268 1269 TYPE_LITERAL_PARSERS = { 1270 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1271 } 1272 1273 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1274 1275 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1276 1277 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1278 1279 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1280 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1281 "ISOLATION": ( 1282 ("LEVEL", "REPEATABLE", "READ"), 1283 ("LEVEL", "READ", "COMMITTED"), 1284 ("LEVEL", "READ", "UNCOMITTED"), 1285 ("LEVEL", "SERIALIZABLE"), 1286 ), 1287 "READ": ("WRITE", "ONLY"), 1288 } 1289 1290 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1291 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1292 ) 1293 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1294 1295 CREATE_SEQUENCE: OPTIONS_TYPE = { 1296 "SCALE": ("EXTEND", "NOEXTEND"), 1297 "SHARD": ("EXTEND", "NOEXTEND"), 1298 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1299 **dict.fromkeys( 1300 ( 1301 "SESSION", 1302 "GLOBAL", 1303 "KEEP", 1304 "NOKEEP", 1305 "ORDER", 1306 "NOORDER", 1307 "NOCACHE", 1308 "CYCLE", 1309 "NOCYCLE", 1310 "NOMINVALUE", 1311 "NOMAXVALUE", 1312 "NOSCALE", 1313 "NOSHARD", 1314 ), 1315 tuple(), 1316 ), 1317 } 1318 1319 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1320 1321 USABLES: OPTIONS_TYPE = dict.fromkeys( 1322 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1323 ) 1324 1325 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1326 1327 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1328 "TYPE": ("EVOLUTION",), 1329 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1330 } 1331 1332 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1333 1334 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1335 1336 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1337 "NOT": ("ENFORCED",), 1338 "MATCH": ( 1339 "FULL", 1340 "PARTIAL", 1341 "SIMPLE", 1342 ), 1343 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1344 "USING": ( 1345 "BTREE", 1346 "HASH", 1347 ), 1348 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1349 } 1350 1351 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1352 "NO": ("OTHERS",), 1353 "CURRENT": ("ROW",), 1354 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1355 } 1356 1357 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1358 1359 CLONE_KEYWORDS = {"CLONE", "COPY"} 1360 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1361 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1362 1363 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1364 1365 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1366 1367 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1368 1369 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1370 1371 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1372 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1373 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1374 1375 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1376 1377 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1378 1379 ADD_CONSTRAINT_TOKENS = { 1380 TokenType.CONSTRAINT, 1381 TokenType.FOREIGN_KEY, 1382 TokenType.INDEX, 1383 TokenType.KEY, 1384 TokenType.PRIMARY_KEY, 1385 TokenType.UNIQUE, 1386 } 1387 1388 DISTINCT_TOKENS = {TokenType.DISTINCT} 1389 1390 NULL_TOKENS = {TokenType.NULL} 1391 1392 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1393 1394 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1395 1396 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1397 1398 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1399 1400 ODBC_DATETIME_LITERALS = { 1401 "d": exp.Date, 1402 "t": exp.Time, 1403 "ts": exp.Timestamp, 1404 } 1405 1406 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1407 1408 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1409 1410 # The style options for the DESCRIBE statement 1411 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1412 1413 # The style options for the ANALYZE statement 1414 ANALYZE_STYLES = { 1415 "BUFFER_USAGE_LIMIT", 1416 "FULL", 1417 "LOCAL", 1418 "NO_WRITE_TO_BINLOG", 1419 "SAMPLE", 1420 "SKIP_LOCKED", 1421 "VERBOSE", 1422 } 1423 1424 ANALYZE_EXPRESSION_PARSERS = { 1425 "ALL": lambda self: self._parse_analyze_columns(), 1426 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1427 "DELETE": lambda self: self._parse_analyze_delete(), 1428 "DROP": lambda self: self._parse_analyze_histogram(), 1429 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1430 "LIST": lambda self: self._parse_analyze_list(), 1431 "PREDICATE": lambda self: self._parse_analyze_columns(), 1432 "UPDATE": lambda self: self._parse_analyze_histogram(), 1433 "VALIDATE": lambda self: self._parse_analyze_validate(), 1434 } 1435 1436 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1437 1438 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1439 1440 OPERATION_MODIFIERS: t.Set[str] = set() 1441 1442 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1443 1444 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1445 1446 STRICT_CAST = True 1447 1448 PREFIXED_PIVOT_COLUMNS = False 1449 IDENTIFY_PIVOT_STRINGS = False 1450 1451 LOG_DEFAULTS_TO_LN = False 1452 1453 # Whether ADD is present for each column added by ALTER TABLE 1454 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1455 1456 # Whether the table sample clause expects CSV syntax 1457 TABLESAMPLE_CSV = False 1458 1459 # The default method used for table sampling 1460 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1461 1462 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1463 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1464 1465 # Whether the TRIM function expects the characters to trim as its first argument 1466 TRIM_PATTERN_FIRST = False 1467 1468 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1469 STRING_ALIASES = False 1470 1471 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1472 MODIFIERS_ATTACHED_TO_SET_OP = True 1473 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1474 1475 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1476 NO_PAREN_IF_COMMANDS = True 1477 1478 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1479 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1480 1481 # Whether the `:` operator is used to extract a value from a VARIANT column 1482 COLON_IS_VARIANT_EXTRACT = False 1483 1484 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1485 # If this is True and '(' is not found, the keyword will be treated as an identifier 1486 VALUES_FOLLOWED_BY_PAREN = True 1487 1488 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1489 SUPPORTS_IMPLICIT_UNNEST = False 1490 1491 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1492 INTERVAL_SPANS = True 1493 1494 # Whether a PARTITION clause can follow a table reference 1495 SUPPORTS_PARTITION_SELECTION = False 1496 1497 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1498 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1499 1500 # Whether the 'AS' keyword is optional in the CTE definition syntax 1501 OPTIONAL_ALIAS_TOKEN_CTE = True 1502 1503 __slots__ = ( 1504 "error_level", 1505 "error_message_context", 1506 "max_errors", 1507 "dialect", 1508 "sql", 1509 "errors", 1510 "_tokens", 1511 "_index", 1512 "_curr", 1513 "_next", 1514 "_prev", 1515 "_prev_comments", 1516 ) 1517 1518 # Autofilled 1519 SHOW_TRIE: t.Dict = {} 1520 SET_TRIE: t.Dict = {} 1521 1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset() 1536 1537 def reset(self): 1538 self.sql = "" 1539 self.errors = [] 1540 self._tokens = [] 1541 self._index = 0 1542 self._curr = None 1543 self._next = None 1544 self._prev = None 1545 self._prev_comments = None 1546 1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 ) 1564 1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1] 1600 1601 def _parse( 1602 self, 1603 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1604 raw_tokens: t.List[Token], 1605 sql: t.Optional[str] = None, 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 self.reset() 1608 self.sql = sql or "" 1609 1610 total = len(raw_tokens) 1611 chunks: t.List[t.List[Token]] = [[]] 1612 1613 for i, token in enumerate(raw_tokens): 1614 if token.token_type == TokenType.SEMICOLON: 1615 if token.comments: 1616 chunks.append([token]) 1617 1618 if i < total - 1: 1619 chunks.append([]) 1620 else: 1621 chunks[-1].append(token) 1622 1623 expressions = [] 1624 1625 for tokens in chunks: 1626 self._index = -1 1627 self._tokens = tokens 1628 self._advance() 1629 1630 expressions.append(parse_method(self)) 1631 1632 if self._index < len(self._tokens): 1633 self.raise_error("Invalid expression / Unexpected token") 1634 1635 self.check_errors() 1636 1637 return expressions 1638 1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 ) 1649 1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error) 1677 1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance) 1695 1696 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1697 if expression and self._prev_comments: 1698 expression.add_comments(self._prev_comments) 1699 self._prev_comments = None 1700 1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression 1717 1718 def _find_sql(self, start: Token, end: Token) -> str: 1719 return self.sql[start.start : end.end + 1] 1720 1721 def _is_connected(self) -> bool: 1722 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1723 1724 def _advance(self, times: int = 1) -> None: 1725 self._index += times 1726 self._curr = seq_get(self._tokens, self._index) 1727 self._next = seq_get(self._tokens, self._index + 1) 1728 1729 if self._index > 0: 1730 self._prev = self._tokens[self._index - 1] 1731 self._prev_comments = self._prev.comments 1732 else: 1733 self._prev = None 1734 self._prev_comments = None 1735 1736 def _retreat(self, index: int) -> None: 1737 if index != self._index: 1738 self._advance(index - self._index) 1739 1740 def _warn_unsupported(self) -> None: 1741 if len(self._tokens) <= 1: 1742 return 1743 1744 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1745 # interested in emitting a warning for the one being currently processed. 1746 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1747 1748 logger.warning( 1749 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1750 ) 1751 1752 def _parse_command(self) -> exp.Command: 1753 self._warn_unsupported() 1754 return self.expression( 1755 exp.Command, 1756 comments=self._prev_comments, 1757 this=self._prev.text.upper(), 1758 expression=self._parse_string(), 1759 ) 1760 1761 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1762 """ 1763 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1764 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1765 solve this by setting & resetting the parser state accordingly 1766 """ 1767 index = self._index 1768 error_level = self.error_level 1769 1770 self.error_level = ErrorLevel.IMMEDIATE 1771 try: 1772 this = parse_method() 1773 except ParseError: 1774 this = None 1775 finally: 1776 if not this or retreat: 1777 self._retreat(index) 1778 self.error_level = error_level 1779 1780 return this 1781 1782 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1783 start = self._prev 1784 exists = self._parse_exists() if allow_exists else None 1785 1786 self._match(TokenType.ON) 1787 1788 materialized = self._match_text_seq("MATERIALIZED") 1789 kind = self._match_set(self.CREATABLES) and self._prev 1790 if not kind: 1791 return self._parse_as_command(start) 1792 1793 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1794 this = self._parse_user_defined_function(kind=kind.token_type) 1795 elif kind.token_type == TokenType.TABLE: 1796 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1797 elif kind.token_type == TokenType.COLUMN: 1798 this = self._parse_column() 1799 else: 1800 this = self._parse_id_var() 1801 1802 self._match(TokenType.IS) 1803 1804 return self.expression( 1805 exp.Comment, 1806 this=this, 1807 kind=kind.text, 1808 expression=self._parse_string(), 1809 exists=exists, 1810 materialized=materialized, 1811 ) 1812 1813 def _parse_to_table( 1814 self, 1815 ) -> exp.ToTableProperty: 1816 table = self._parse_table_parts(schema=True) 1817 return self.expression(exp.ToTableProperty, this=table) 1818 1819 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1820 def _parse_ttl(self) -> exp.Expression: 1821 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1822 this = self._parse_bitwise() 1823 1824 if self._match_text_seq("DELETE"): 1825 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1826 if self._match_text_seq("RECOMPRESS"): 1827 return self.expression( 1828 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1829 ) 1830 if self._match_text_seq("TO", "DISK"): 1831 return self.expression( 1832 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1833 ) 1834 if self._match_text_seq("TO", "VOLUME"): 1835 return self.expression( 1836 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1837 ) 1838 1839 return this 1840 1841 expressions = self._parse_csv(_parse_ttl_action) 1842 where = self._parse_where() 1843 group = self._parse_group() 1844 1845 aggregates = None 1846 if group and self._match(TokenType.SET): 1847 aggregates = self._parse_csv(self._parse_set_item) 1848 1849 return self.expression( 1850 exp.MergeTreeTTL, 1851 expressions=expressions, 1852 where=where, 1853 group=group, 1854 aggregates=aggregates, 1855 ) 1856 1857 def _parse_statement(self) -> t.Optional[exp.Expression]: 1858 if self._curr is None: 1859 return None 1860 1861 if self._match_set(self.STATEMENT_PARSERS): 1862 comments = self._prev_comments 1863 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1864 stmt.add_comments(comments, prepend=True) 1865 return stmt 1866 1867 if self._match_set(self.dialect.tokenizer.COMMANDS): 1868 return self._parse_command() 1869 1870 expression = self._parse_expression() 1871 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1872 return self._parse_query_modifiers(expression) 1873 1874 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1875 start = self._prev 1876 temporary = self._match(TokenType.TEMPORARY) 1877 materialized = self._match_text_seq("MATERIALIZED") 1878 1879 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1880 if not kind: 1881 return self._parse_as_command(start) 1882 1883 concurrently = self._match_text_seq("CONCURRENTLY") 1884 if_exists = exists or self._parse_exists() 1885 1886 if kind == "COLUMN": 1887 this = self._parse_column() 1888 else: 1889 this = self._parse_table_parts( 1890 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1891 ) 1892 1893 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1894 1895 if self._match(TokenType.L_PAREN, advance=False): 1896 expressions = self._parse_wrapped_csv(self._parse_types) 1897 else: 1898 expressions = None 1899 1900 return self.expression( 1901 exp.Drop, 1902 exists=if_exists, 1903 this=this, 1904 expressions=expressions, 1905 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1906 temporary=temporary, 1907 materialized=materialized, 1908 cascade=self._match_text_seq("CASCADE"), 1909 constraints=self._match_text_seq("CONSTRAINTS"), 1910 purge=self._match_text_seq("PURGE"), 1911 cluster=cluster, 1912 concurrently=concurrently, 1913 ) 1914 1915 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1916 return ( 1917 self._match_text_seq("IF") 1918 and (not not_ or self._match(TokenType.NOT)) 1919 and self._match(TokenType.EXISTS) 1920 ) 1921 1922 def _parse_create(self) -> exp.Create | exp.Command: 1923 # Note: this can't be None because we've matched a statement parser 1924 start = self._prev 1925 1926 replace = ( 1927 start.token_type == TokenType.REPLACE 1928 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1929 or self._match_pair(TokenType.OR, TokenType.ALTER) 1930 ) 1931 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1932 1933 unique = self._match(TokenType.UNIQUE) 1934 1935 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1936 clustered = True 1937 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1938 "COLUMNSTORE" 1939 ): 1940 clustered = False 1941 else: 1942 clustered = None 1943 1944 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1945 self._advance() 1946 1947 properties = None 1948 create_token = self._match_set(self.CREATABLES) and self._prev 1949 1950 if not create_token: 1951 # exp.Properties.Location.POST_CREATE 1952 properties = self._parse_properties() 1953 create_token = self._match_set(self.CREATABLES) and self._prev 1954 1955 if not properties or not create_token: 1956 return self._parse_as_command(start) 1957 1958 concurrently = self._match_text_seq("CONCURRENTLY") 1959 exists = self._parse_exists(not_=True) 1960 this = None 1961 expression: t.Optional[exp.Expression] = None 1962 indexes = None 1963 no_schema_binding = None 1964 begin = None 1965 end = None 1966 clone = None 1967 1968 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1969 nonlocal properties 1970 if properties and temp_props: 1971 properties.expressions.extend(temp_props.expressions) 1972 elif temp_props: 1973 properties = temp_props 1974 1975 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1976 this = self._parse_user_defined_function(kind=create_token.token_type) 1977 1978 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1979 extend_props(self._parse_properties()) 1980 1981 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1982 extend_props(self._parse_properties()) 1983 1984 if not expression: 1985 if self._match(TokenType.COMMAND): 1986 expression = self._parse_as_command(self._prev) 1987 else: 1988 begin = self._match(TokenType.BEGIN) 1989 return_ = self._match_text_seq("RETURN") 1990 1991 if self._match(TokenType.STRING, advance=False): 1992 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1993 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1994 expression = self._parse_string() 1995 extend_props(self._parse_properties()) 1996 else: 1997 expression = self._parse_user_defined_function_expression() 1998 1999 end = self._match_text_seq("END") 2000 2001 if return_: 2002 expression = self.expression(exp.Return, this=expression) 2003 elif create_token.token_type == TokenType.INDEX: 2004 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2005 if not self._match(TokenType.ON): 2006 index = self._parse_id_var() 2007 anonymous = False 2008 else: 2009 index = None 2010 anonymous = True 2011 2012 this = self._parse_index(index=index, anonymous=anonymous) 2013 elif create_token.token_type in self.DB_CREATABLES: 2014 table_parts = self._parse_table_parts( 2015 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2016 ) 2017 2018 # exp.Properties.Location.POST_NAME 2019 self._match(TokenType.COMMA) 2020 extend_props(self._parse_properties(before=True)) 2021 2022 this = self._parse_schema(this=table_parts) 2023 2024 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2025 extend_props(self._parse_properties()) 2026 2027 has_alias = self._match(TokenType.ALIAS) 2028 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2029 # exp.Properties.Location.POST_ALIAS 2030 extend_props(self._parse_properties()) 2031 2032 if create_token.token_type == TokenType.SEQUENCE: 2033 expression = self._parse_types() 2034 extend_props(self._parse_properties()) 2035 else: 2036 expression = self._parse_ddl_select() 2037 2038 # Some dialects also support using a table as an alias instead of a SELECT. 2039 # Here we fallback to this as an alternative. 2040 if not expression and has_alias: 2041 expression = self._try_parse(self._parse_table_parts) 2042 2043 if create_token.token_type == TokenType.TABLE: 2044 # exp.Properties.Location.POST_EXPRESSION 2045 extend_props(self._parse_properties()) 2046 2047 indexes = [] 2048 while True: 2049 index = self._parse_index() 2050 2051 # exp.Properties.Location.POST_INDEX 2052 extend_props(self._parse_properties()) 2053 if not index: 2054 break 2055 else: 2056 self._match(TokenType.COMMA) 2057 indexes.append(index) 2058 elif create_token.token_type == TokenType.VIEW: 2059 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2060 no_schema_binding = True 2061 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2062 extend_props(self._parse_properties()) 2063 2064 shallow = self._match_text_seq("SHALLOW") 2065 2066 if self._match_texts(self.CLONE_KEYWORDS): 2067 copy = self._prev.text.lower() == "copy" 2068 clone = self.expression( 2069 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2070 ) 2071 2072 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2073 return self._parse_as_command(start) 2074 2075 create_kind_text = create_token.text.upper() 2076 return self.expression( 2077 exp.Create, 2078 this=this, 2079 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2080 replace=replace, 2081 refresh=refresh, 2082 unique=unique, 2083 expression=expression, 2084 exists=exists, 2085 properties=properties, 2086 indexes=indexes, 2087 no_schema_binding=no_schema_binding, 2088 begin=begin, 2089 end=end, 2090 clone=clone, 2091 concurrently=concurrently, 2092 clustered=clustered, 2093 ) 2094 2095 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2096 seq = exp.SequenceProperties() 2097 2098 options = [] 2099 index = self._index 2100 2101 while self._curr: 2102 self._match(TokenType.COMMA) 2103 if self._match_text_seq("INCREMENT"): 2104 self._match_text_seq("BY") 2105 self._match_text_seq("=") 2106 seq.set("increment", self._parse_term()) 2107 elif self._match_text_seq("MINVALUE"): 2108 seq.set("minvalue", self._parse_term()) 2109 elif self._match_text_seq("MAXVALUE"): 2110 seq.set("maxvalue", self._parse_term()) 2111 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2112 self._match_text_seq("=") 2113 seq.set("start", self._parse_term()) 2114 elif self._match_text_seq("CACHE"): 2115 # T-SQL allows empty CACHE which is initialized dynamically 2116 seq.set("cache", self._parse_number() or True) 2117 elif self._match_text_seq("OWNED", "BY"): 2118 # "OWNED BY NONE" is the default 2119 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2120 else: 2121 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2122 if opt: 2123 options.append(opt) 2124 else: 2125 break 2126 2127 seq.set("options", options if options else None) 2128 return None if self._index == index else seq 2129 2130 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2131 # only used for teradata currently 2132 self._match(TokenType.COMMA) 2133 2134 kwargs = { 2135 "no": self._match_text_seq("NO"), 2136 "dual": self._match_text_seq("DUAL"), 2137 "before": self._match_text_seq("BEFORE"), 2138 "default": self._match_text_seq("DEFAULT"), 2139 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2140 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2141 "after": self._match_text_seq("AFTER"), 2142 "minimum": self._match_texts(("MIN", "MINIMUM")), 2143 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2144 } 2145 2146 if self._match_texts(self.PROPERTY_PARSERS): 2147 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2148 try: 2149 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2150 except TypeError: 2151 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2152 2153 return None 2154 2155 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2156 return self._parse_wrapped_csv(self._parse_property) 2157 2158 def _parse_property(self) -> t.Optional[exp.Expression]: 2159 if self._match_texts(self.PROPERTY_PARSERS): 2160 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2161 2162 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2163 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2164 2165 if self._match_text_seq("COMPOUND", "SORTKEY"): 2166 return self._parse_sortkey(compound=True) 2167 2168 if self._match_text_seq("SQL", "SECURITY"): 2169 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2170 2171 index = self._index 2172 key = self._parse_column() 2173 2174 if not self._match(TokenType.EQ): 2175 self._retreat(index) 2176 return self._parse_sequence_properties() 2177 2178 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2179 if isinstance(key, exp.Column): 2180 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2181 2182 value = self._parse_bitwise() or self._parse_var(any_token=True) 2183 2184 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2185 if isinstance(value, exp.Column): 2186 value = exp.var(value.name) 2187 2188 return self.expression(exp.Property, this=key, value=value) 2189 2190 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2191 if self._match_text_seq("BY"): 2192 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2193 2194 self._match(TokenType.ALIAS) 2195 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2196 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2197 2198 return self.expression( 2199 exp.FileFormatProperty, 2200 this=( 2201 self.expression( 2202 exp.InputOutputFormat, 2203 input_format=input_format, 2204 output_format=output_format, 2205 ) 2206 if input_format or output_format 2207 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2208 ), 2209 ) 2210 2211 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2212 field = self._parse_field() 2213 if isinstance(field, exp.Identifier) and not field.quoted: 2214 field = exp.var(field) 2215 2216 return field 2217 2218 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2219 self._match(TokenType.EQ) 2220 self._match(TokenType.ALIAS) 2221 2222 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2223 2224 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2225 properties = [] 2226 while True: 2227 if before: 2228 prop = self._parse_property_before() 2229 else: 2230 prop = self._parse_property() 2231 if not prop: 2232 break 2233 for p in ensure_list(prop): 2234 properties.append(p) 2235 2236 if properties: 2237 return self.expression(exp.Properties, expressions=properties) 2238 2239 return None 2240 2241 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2242 return self.expression( 2243 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2244 ) 2245 2246 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2247 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2248 security_specifier = self._prev.text.upper() 2249 return self.expression(exp.SecurityProperty, this=security_specifier) 2250 return None 2251 2252 def _parse_settings_property(self) -> exp.SettingsProperty: 2253 return self.expression( 2254 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2255 ) 2256 2257 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2258 if self._index >= 2: 2259 pre_volatile_token = self._tokens[self._index - 2] 2260 else: 2261 pre_volatile_token = None 2262 2263 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2264 return exp.VolatileProperty() 2265 2266 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2267 2268 def _parse_retention_period(self) -> exp.Var: 2269 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2270 number = self._parse_number() 2271 number_str = f"{number} " if number else "" 2272 unit = self._parse_var(any_token=True) 2273 return exp.var(f"{number_str}{unit}") 2274 2275 def _parse_system_versioning_property( 2276 self, with_: bool = False 2277 ) -> exp.WithSystemVersioningProperty: 2278 self._match(TokenType.EQ) 2279 prop = self.expression( 2280 exp.WithSystemVersioningProperty, 2281 **{ # type: ignore 2282 "on": True, 2283 "with": with_, 2284 }, 2285 ) 2286 2287 if self._match_text_seq("OFF"): 2288 prop.set("on", False) 2289 return prop 2290 2291 self._match(TokenType.ON) 2292 if self._match(TokenType.L_PAREN): 2293 while self._curr and not self._match(TokenType.R_PAREN): 2294 if self._match_text_seq("HISTORY_TABLE", "="): 2295 prop.set("this", self._parse_table_parts()) 2296 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2297 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2298 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2299 prop.set("retention_period", self._parse_retention_period()) 2300 2301 self._match(TokenType.COMMA) 2302 2303 return prop 2304 2305 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2306 self._match(TokenType.EQ) 2307 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2308 prop = self.expression(exp.DataDeletionProperty, on=on) 2309 2310 if self._match(TokenType.L_PAREN): 2311 while self._curr and not self._match(TokenType.R_PAREN): 2312 if self._match_text_seq("FILTER_COLUMN", "="): 2313 prop.set("filter_column", self._parse_column()) 2314 elif self._match_text_seq("RETENTION_PERIOD", "="): 2315 prop.set("retention_period", self._parse_retention_period()) 2316 2317 self._match(TokenType.COMMA) 2318 2319 return prop 2320 2321 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2322 kind = "HASH" 2323 expressions: t.Optional[t.List[exp.Expression]] = None 2324 if self._match_text_seq("BY", "HASH"): 2325 expressions = self._parse_wrapped_csv(self._parse_id_var) 2326 elif self._match_text_seq("BY", "RANDOM"): 2327 kind = "RANDOM" 2328 2329 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2330 buckets: t.Optional[exp.Expression] = None 2331 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2332 buckets = self._parse_number() 2333 2334 return self.expression( 2335 exp.DistributedByProperty, 2336 expressions=expressions, 2337 kind=kind, 2338 buckets=buckets, 2339 order=self._parse_order(), 2340 ) 2341 2342 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2343 self._match_text_seq("KEY") 2344 expressions = self._parse_wrapped_id_vars() 2345 return self.expression(expr_type, expressions=expressions) 2346 2347 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2348 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2349 prop = self._parse_system_versioning_property(with_=True) 2350 self._match_r_paren() 2351 return prop 2352 2353 if self._match(TokenType.L_PAREN, advance=False): 2354 return self._parse_wrapped_properties() 2355 2356 if self._match_text_seq("JOURNAL"): 2357 return self._parse_withjournaltable() 2358 2359 if self._match_texts(self.VIEW_ATTRIBUTES): 2360 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2361 2362 if self._match_text_seq("DATA"): 2363 return self._parse_withdata(no=False) 2364 elif self._match_text_seq("NO", "DATA"): 2365 return self._parse_withdata(no=True) 2366 2367 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2368 return self._parse_serde_properties(with_=True) 2369 2370 if self._match(TokenType.SCHEMA): 2371 return self.expression( 2372 exp.WithSchemaBindingProperty, 2373 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2374 ) 2375 2376 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2377 return self.expression( 2378 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2379 ) 2380 2381 if not self._next: 2382 return None 2383 2384 return self._parse_withisolatedloading() 2385 2386 def _parse_procedure_option(self) -> exp.Expression | None: 2387 if self._match_text_seq("EXECUTE", "AS"): 2388 return self.expression( 2389 exp.ExecuteAsProperty, 2390 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2391 or self._parse_string(), 2392 ) 2393 2394 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2395 2396 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2397 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2398 self._match(TokenType.EQ) 2399 2400 user = self._parse_id_var() 2401 self._match(TokenType.PARAMETER) 2402 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2403 2404 if not user or not host: 2405 return None 2406 2407 return exp.DefinerProperty(this=f"{user}@{host}") 2408 2409 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2410 self._match(TokenType.TABLE) 2411 self._match(TokenType.EQ) 2412 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2413 2414 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2415 return self.expression(exp.LogProperty, no=no) 2416 2417 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2418 return self.expression(exp.JournalProperty, **kwargs) 2419 2420 def _parse_checksum(self) -> exp.ChecksumProperty: 2421 self._match(TokenType.EQ) 2422 2423 on = None 2424 if self._match(TokenType.ON): 2425 on = True 2426 elif self._match_text_seq("OFF"): 2427 on = False 2428 2429 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2430 2431 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2432 return self.expression( 2433 exp.Cluster, 2434 expressions=( 2435 self._parse_wrapped_csv(self._parse_ordered) 2436 if wrapped 2437 else self._parse_csv(self._parse_ordered) 2438 ), 2439 ) 2440 2441 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2442 self._match_text_seq("BY") 2443 2444 self._match_l_paren() 2445 expressions = self._parse_csv(self._parse_column) 2446 self._match_r_paren() 2447 2448 if self._match_text_seq("SORTED", "BY"): 2449 self._match_l_paren() 2450 sorted_by = self._parse_csv(self._parse_ordered) 2451 self._match_r_paren() 2452 else: 2453 sorted_by = None 2454 2455 self._match(TokenType.INTO) 2456 buckets = self._parse_number() 2457 self._match_text_seq("BUCKETS") 2458 2459 return self.expression( 2460 exp.ClusteredByProperty, 2461 expressions=expressions, 2462 sorted_by=sorted_by, 2463 buckets=buckets, 2464 ) 2465 2466 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2467 if not self._match_text_seq("GRANTS"): 2468 self._retreat(self._index - 1) 2469 return None 2470 2471 return self.expression(exp.CopyGrantsProperty) 2472 2473 def _parse_freespace(self) -> exp.FreespaceProperty: 2474 self._match(TokenType.EQ) 2475 return self.expression( 2476 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2477 ) 2478 2479 def _parse_mergeblockratio( 2480 self, no: bool = False, default: bool = False 2481 ) -> exp.MergeBlockRatioProperty: 2482 if self._match(TokenType.EQ): 2483 return self.expression( 2484 exp.MergeBlockRatioProperty, 2485 this=self._parse_number(), 2486 percent=self._match(TokenType.PERCENT), 2487 ) 2488 2489 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2490 2491 def _parse_datablocksize( 2492 self, 2493 default: t.Optional[bool] = None, 2494 minimum: t.Optional[bool] = None, 2495 maximum: t.Optional[bool] = None, 2496 ) -> exp.DataBlocksizeProperty: 2497 self._match(TokenType.EQ) 2498 size = self._parse_number() 2499 2500 units = None 2501 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2502 units = self._prev.text 2503 2504 return self.expression( 2505 exp.DataBlocksizeProperty, 2506 size=size, 2507 units=units, 2508 default=default, 2509 minimum=minimum, 2510 maximum=maximum, 2511 ) 2512 2513 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2514 self._match(TokenType.EQ) 2515 always = self._match_text_seq("ALWAYS") 2516 manual = self._match_text_seq("MANUAL") 2517 never = self._match_text_seq("NEVER") 2518 default = self._match_text_seq("DEFAULT") 2519 2520 autotemp = None 2521 if self._match_text_seq("AUTOTEMP"): 2522 autotemp = self._parse_schema() 2523 2524 return self.expression( 2525 exp.BlockCompressionProperty, 2526 always=always, 2527 manual=manual, 2528 never=never, 2529 default=default, 2530 autotemp=autotemp, 2531 ) 2532 2533 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2534 index = self._index 2535 no = self._match_text_seq("NO") 2536 concurrent = self._match_text_seq("CONCURRENT") 2537 2538 if not self._match_text_seq("ISOLATED", "LOADING"): 2539 self._retreat(index) 2540 return None 2541 2542 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2543 return self.expression( 2544 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2545 ) 2546 2547 def _parse_locking(self) -> exp.LockingProperty: 2548 if self._match(TokenType.TABLE): 2549 kind = "TABLE" 2550 elif self._match(TokenType.VIEW): 2551 kind = "VIEW" 2552 elif self._match(TokenType.ROW): 2553 kind = "ROW" 2554 elif self._match_text_seq("DATABASE"): 2555 kind = "DATABASE" 2556 else: 2557 kind = None 2558 2559 if kind in ("DATABASE", "TABLE", "VIEW"): 2560 this = self._parse_table_parts() 2561 else: 2562 this = None 2563 2564 if self._match(TokenType.FOR): 2565 for_or_in = "FOR" 2566 elif self._match(TokenType.IN): 2567 for_or_in = "IN" 2568 else: 2569 for_or_in = None 2570 2571 if self._match_text_seq("ACCESS"): 2572 lock_type = "ACCESS" 2573 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2574 lock_type = "EXCLUSIVE" 2575 elif self._match_text_seq("SHARE"): 2576 lock_type = "SHARE" 2577 elif self._match_text_seq("READ"): 2578 lock_type = "READ" 2579 elif self._match_text_seq("WRITE"): 2580 lock_type = "WRITE" 2581 elif self._match_text_seq("CHECKSUM"): 2582 lock_type = "CHECKSUM" 2583 else: 2584 lock_type = None 2585 2586 override = self._match_text_seq("OVERRIDE") 2587 2588 return self.expression( 2589 exp.LockingProperty, 2590 this=this, 2591 kind=kind, 2592 for_or_in=for_or_in, 2593 lock_type=lock_type, 2594 override=override, 2595 ) 2596 2597 def _parse_partition_by(self) -> t.List[exp.Expression]: 2598 if self._match(TokenType.PARTITION_BY): 2599 return self._parse_csv(self._parse_assignment) 2600 return [] 2601 2602 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2603 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2604 if self._match_text_seq("MINVALUE"): 2605 return exp.var("MINVALUE") 2606 if self._match_text_seq("MAXVALUE"): 2607 return exp.var("MAXVALUE") 2608 return self._parse_bitwise() 2609 2610 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2611 expression = None 2612 from_expressions = None 2613 to_expressions = None 2614 2615 if self._match(TokenType.IN): 2616 this = self._parse_wrapped_csv(self._parse_bitwise) 2617 elif self._match(TokenType.FROM): 2618 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2619 self._match_text_seq("TO") 2620 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2621 elif self._match_text_seq("WITH", "(", "MODULUS"): 2622 this = self._parse_number() 2623 self._match_text_seq(",", "REMAINDER") 2624 expression = self._parse_number() 2625 self._match_r_paren() 2626 else: 2627 self.raise_error("Failed to parse partition bound spec.") 2628 2629 return self.expression( 2630 exp.PartitionBoundSpec, 2631 this=this, 2632 expression=expression, 2633 from_expressions=from_expressions, 2634 to_expressions=to_expressions, 2635 ) 2636 2637 # https://www.postgresql.org/docs/current/sql-createtable.html 2638 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2639 if not self._match_text_seq("OF"): 2640 self._retreat(self._index - 1) 2641 return None 2642 2643 this = self._parse_table(schema=True) 2644 2645 if self._match(TokenType.DEFAULT): 2646 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2647 elif self._match_text_seq("FOR", "VALUES"): 2648 expression = self._parse_partition_bound_spec() 2649 else: 2650 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2651 2652 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2653 2654 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2655 self._match(TokenType.EQ) 2656 return self.expression( 2657 exp.PartitionedByProperty, 2658 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2659 ) 2660 2661 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2662 if self._match_text_seq("AND", "STATISTICS"): 2663 statistics = True 2664 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2665 statistics = False 2666 else: 2667 statistics = None 2668 2669 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2670 2671 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2672 if self._match_text_seq("SQL"): 2673 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2674 return None 2675 2676 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2677 if self._match_text_seq("SQL", "DATA"): 2678 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2679 return None 2680 2681 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2682 if self._match_text_seq("PRIMARY", "INDEX"): 2683 return exp.NoPrimaryIndexProperty() 2684 if self._match_text_seq("SQL"): 2685 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2686 return None 2687 2688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2689 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2690 return exp.OnCommitProperty() 2691 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2692 return exp.OnCommitProperty(delete=True) 2693 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2694 2695 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2696 if self._match_text_seq("SQL", "DATA"): 2697 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2698 return None 2699 2700 def _parse_distkey(self) -> exp.DistKeyProperty: 2701 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2702 2703 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2704 table = self._parse_table(schema=True) 2705 2706 options = [] 2707 while self._match_texts(("INCLUDING", "EXCLUDING")): 2708 this = self._prev.text.upper() 2709 2710 id_var = self._parse_id_var() 2711 if not id_var: 2712 return None 2713 2714 options.append( 2715 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2716 ) 2717 2718 return self.expression(exp.LikeProperty, this=table, expressions=options) 2719 2720 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2721 return self.expression( 2722 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2723 ) 2724 2725 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2726 self._match(TokenType.EQ) 2727 return self.expression( 2728 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2729 ) 2730 2731 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2732 self._match_text_seq("WITH", "CONNECTION") 2733 return self.expression( 2734 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2735 ) 2736 2737 def _parse_returns(self) -> exp.ReturnsProperty: 2738 value: t.Optional[exp.Expression] 2739 null = None 2740 is_table = self._match(TokenType.TABLE) 2741 2742 if is_table: 2743 if self._match(TokenType.LT): 2744 value = self.expression( 2745 exp.Schema, 2746 this="TABLE", 2747 expressions=self._parse_csv(self._parse_struct_types), 2748 ) 2749 if not self._match(TokenType.GT): 2750 self.raise_error("Expecting >") 2751 else: 2752 value = self._parse_schema(exp.var("TABLE")) 2753 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2754 null = True 2755 value = None 2756 else: 2757 value = self._parse_types() 2758 2759 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2760 2761 def _parse_describe(self) -> exp.Describe: 2762 kind = self._match_set(self.CREATABLES) and self._prev.text 2763 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2764 if self._match(TokenType.DOT): 2765 style = None 2766 self._retreat(self._index - 2) 2767 2768 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2769 2770 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2771 this = self._parse_statement() 2772 else: 2773 this = self._parse_table(schema=True) 2774 2775 properties = self._parse_properties() 2776 expressions = properties.expressions if properties else None 2777 partition = self._parse_partition() 2778 return self.expression( 2779 exp.Describe, 2780 this=this, 2781 style=style, 2782 kind=kind, 2783 expressions=expressions, 2784 partition=partition, 2785 format=format, 2786 ) 2787 2788 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2789 kind = self._prev.text.upper() 2790 expressions = [] 2791 2792 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2793 if self._match(TokenType.WHEN): 2794 expression = self._parse_disjunction() 2795 self._match(TokenType.THEN) 2796 else: 2797 expression = None 2798 2799 else_ = self._match(TokenType.ELSE) 2800 2801 if not self._match(TokenType.INTO): 2802 return None 2803 2804 return self.expression( 2805 exp.ConditionalInsert, 2806 this=self.expression( 2807 exp.Insert, 2808 this=self._parse_table(schema=True), 2809 expression=self._parse_derived_table_values(), 2810 ), 2811 expression=expression, 2812 else_=else_, 2813 ) 2814 2815 expression = parse_conditional_insert() 2816 while expression is not None: 2817 expressions.append(expression) 2818 expression = parse_conditional_insert() 2819 2820 return self.expression( 2821 exp.MultitableInserts, 2822 kind=kind, 2823 comments=comments, 2824 expressions=expressions, 2825 source=self._parse_table(), 2826 ) 2827 2828 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2829 comments = [] 2830 hint = self._parse_hint() 2831 overwrite = self._match(TokenType.OVERWRITE) 2832 ignore = self._match(TokenType.IGNORE) 2833 local = self._match_text_seq("LOCAL") 2834 alternative = None 2835 is_function = None 2836 2837 if self._match_text_seq("DIRECTORY"): 2838 this: t.Optional[exp.Expression] = self.expression( 2839 exp.Directory, 2840 this=self._parse_var_or_string(), 2841 local=local, 2842 row_format=self._parse_row_format(match_row=True), 2843 ) 2844 else: 2845 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2846 comments += ensure_list(self._prev_comments) 2847 return self._parse_multitable_inserts(comments) 2848 2849 if self._match(TokenType.OR): 2850 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2851 2852 self._match(TokenType.INTO) 2853 comments += ensure_list(self._prev_comments) 2854 self._match(TokenType.TABLE) 2855 is_function = self._match(TokenType.FUNCTION) 2856 2857 this = ( 2858 self._parse_table(schema=True, parse_partition=True) 2859 if not is_function 2860 else self._parse_function() 2861 ) 2862 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2863 this.set("alias", self._parse_table_alias()) 2864 2865 returning = self._parse_returning() 2866 2867 return self.expression( 2868 exp.Insert, 2869 comments=comments, 2870 hint=hint, 2871 is_function=is_function, 2872 this=this, 2873 stored=self._match_text_seq("STORED") and self._parse_stored(), 2874 by_name=self._match_text_seq("BY", "NAME"), 2875 exists=self._parse_exists(), 2876 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2877 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2878 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2879 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2880 conflict=self._parse_on_conflict(), 2881 returning=returning or self._parse_returning(), 2882 overwrite=overwrite, 2883 alternative=alternative, 2884 ignore=ignore, 2885 source=self._match(TokenType.TABLE) and self._parse_table(), 2886 ) 2887 2888 def _parse_kill(self) -> exp.Kill: 2889 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2890 2891 return self.expression( 2892 exp.Kill, 2893 this=self._parse_primary(), 2894 kind=kind, 2895 ) 2896 2897 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2898 conflict = self._match_text_seq("ON", "CONFLICT") 2899 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2900 2901 if not conflict and not duplicate: 2902 return None 2903 2904 conflict_keys = None 2905 constraint = None 2906 2907 if conflict: 2908 if self._match_text_seq("ON", "CONSTRAINT"): 2909 constraint = self._parse_id_var() 2910 elif self._match(TokenType.L_PAREN): 2911 conflict_keys = self._parse_csv(self._parse_id_var) 2912 self._match_r_paren() 2913 2914 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2915 if self._prev.token_type == TokenType.UPDATE: 2916 self._match(TokenType.SET) 2917 expressions = self._parse_csv(self._parse_equality) 2918 else: 2919 expressions = None 2920 2921 return self.expression( 2922 exp.OnConflict, 2923 duplicate=duplicate, 2924 expressions=expressions, 2925 action=action, 2926 conflict_keys=conflict_keys, 2927 constraint=constraint, 2928 where=self._parse_where(), 2929 ) 2930 2931 def _parse_returning(self) -> t.Optional[exp.Returning]: 2932 if not self._match(TokenType.RETURNING): 2933 return None 2934 return self.expression( 2935 exp.Returning, 2936 expressions=self._parse_csv(self._parse_expression), 2937 into=self._match(TokenType.INTO) and self._parse_table_part(), 2938 ) 2939 2940 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2941 if not self._match(TokenType.FORMAT): 2942 return None 2943 return self._parse_row_format() 2944 2945 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2946 index = self._index 2947 with_ = with_ or self._match_text_seq("WITH") 2948 2949 if not self._match(TokenType.SERDE_PROPERTIES): 2950 self._retreat(index) 2951 return None 2952 return self.expression( 2953 exp.SerdeProperties, 2954 **{ # type: ignore 2955 "expressions": self._parse_wrapped_properties(), 2956 "with": with_, 2957 }, 2958 ) 2959 2960 def _parse_row_format( 2961 self, match_row: bool = False 2962 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2963 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2964 return None 2965 2966 if self._match_text_seq("SERDE"): 2967 this = self._parse_string() 2968 2969 serde_properties = self._parse_serde_properties() 2970 2971 return self.expression( 2972 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2973 ) 2974 2975 self._match_text_seq("DELIMITED") 2976 2977 kwargs = {} 2978 2979 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2980 kwargs["fields"] = self._parse_string() 2981 if self._match_text_seq("ESCAPED", "BY"): 2982 kwargs["escaped"] = self._parse_string() 2983 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2984 kwargs["collection_items"] = self._parse_string() 2985 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2986 kwargs["map_keys"] = self._parse_string() 2987 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2988 kwargs["lines"] = self._parse_string() 2989 if self._match_text_seq("NULL", "DEFINED", "AS"): 2990 kwargs["null"] = self._parse_string() 2991 2992 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2993 2994 def _parse_load(self) -> exp.LoadData | exp.Command: 2995 if self._match_text_seq("DATA"): 2996 local = self._match_text_seq("LOCAL") 2997 self._match_text_seq("INPATH") 2998 inpath = self._parse_string() 2999 overwrite = self._match(TokenType.OVERWRITE) 3000 self._match_pair(TokenType.INTO, TokenType.TABLE) 3001 3002 return self.expression( 3003 exp.LoadData, 3004 this=self._parse_table(schema=True), 3005 local=local, 3006 overwrite=overwrite, 3007 inpath=inpath, 3008 partition=self._parse_partition(), 3009 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3010 serde=self._match_text_seq("SERDE") and self._parse_string(), 3011 ) 3012 return self._parse_as_command(self._prev) 3013 3014 def _parse_delete(self) -> exp.Delete: 3015 # This handles MySQL's "Multiple-Table Syntax" 3016 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3017 tables = None 3018 if not self._match(TokenType.FROM, advance=False): 3019 tables = self._parse_csv(self._parse_table) or None 3020 3021 returning = self._parse_returning() 3022 3023 return self.expression( 3024 exp.Delete, 3025 tables=tables, 3026 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3027 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3028 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3029 where=self._parse_where(), 3030 returning=returning or self._parse_returning(), 3031 limit=self._parse_limit(), 3032 ) 3033 3034 def _parse_update(self) -> exp.Update: 3035 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3036 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3037 returning = self._parse_returning() 3038 return self.expression( 3039 exp.Update, 3040 **{ # type: ignore 3041 "this": this, 3042 "expressions": expressions, 3043 "from": self._parse_from(joins=True), 3044 "where": self._parse_where(), 3045 "returning": returning or self._parse_returning(), 3046 "order": self._parse_order(), 3047 "limit": self._parse_limit(), 3048 }, 3049 ) 3050 3051 def _parse_use(self) -> exp.Use: 3052 return self.expression( 3053 exp.Use, 3054 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3055 this=self._parse_table(schema=False), 3056 ) 3057 3058 def _parse_uncache(self) -> exp.Uncache: 3059 if not self._match(TokenType.TABLE): 3060 self.raise_error("Expecting TABLE after UNCACHE") 3061 3062 return self.expression( 3063 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3064 ) 3065 3066 def _parse_cache(self) -> exp.Cache: 3067 lazy = self._match_text_seq("LAZY") 3068 self._match(TokenType.TABLE) 3069 table = self._parse_table(schema=True) 3070 3071 options = [] 3072 if self._match_text_seq("OPTIONS"): 3073 self._match_l_paren() 3074 k = self._parse_string() 3075 self._match(TokenType.EQ) 3076 v = self._parse_string() 3077 options = [k, v] 3078 self._match_r_paren() 3079 3080 self._match(TokenType.ALIAS) 3081 return self.expression( 3082 exp.Cache, 3083 this=table, 3084 lazy=lazy, 3085 options=options, 3086 expression=self._parse_select(nested=True), 3087 ) 3088 3089 def _parse_partition(self) -> t.Optional[exp.Partition]: 3090 if not self._match_texts(self.PARTITION_KEYWORDS): 3091 return None 3092 3093 return self.expression( 3094 exp.Partition, 3095 subpartition=self._prev.text.upper() == "SUBPARTITION", 3096 expressions=self._parse_wrapped_csv(self._parse_assignment), 3097 ) 3098 3099 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3100 def _parse_value_expression() -> t.Optional[exp.Expression]: 3101 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3102 return exp.var(self._prev.text.upper()) 3103 return self._parse_expression() 3104 3105 if self._match(TokenType.L_PAREN): 3106 expressions = self._parse_csv(_parse_value_expression) 3107 self._match_r_paren() 3108 return self.expression(exp.Tuple, expressions=expressions) 3109 3110 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3111 expression = self._parse_expression() 3112 if expression: 3113 return self.expression(exp.Tuple, expressions=[expression]) 3114 return None 3115 3116 def _parse_projections(self) -> t.List[exp.Expression]: 3117 return self._parse_expressions() 3118 3119 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3120 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3121 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3122 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3123 ) 3124 elif self._match(TokenType.FROM): 3125 from_ = self._parse_from(skip_from_token=True) 3126 # Support parentheses for duckdb FROM-first syntax 3127 select = self._parse_select() 3128 if select: 3129 select.set("from", from_) 3130 this = select 3131 else: 3132 this = exp.select("*").from_(t.cast(exp.From, from_)) 3133 else: 3134 this = ( 3135 self._parse_table() 3136 if table 3137 else self._parse_select(nested=True, parse_set_operation=False) 3138 ) 3139 3140 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3141 # in case a modifier (e.g. join) is following 3142 if table and isinstance(this, exp.Values) and this.alias: 3143 alias = this.args["alias"].pop() 3144 this = exp.Table(this=this, alias=alias) 3145 3146 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3147 3148 return this 3149 3150 def _parse_select( 3151 self, 3152 nested: bool = False, 3153 table: bool = False, 3154 parse_subquery_alias: bool = True, 3155 parse_set_operation: bool = True, 3156 ) -> t.Optional[exp.Expression]: 3157 cte = self._parse_with() 3158 3159 if cte: 3160 this = self._parse_statement() 3161 3162 if not this: 3163 self.raise_error("Failed to parse any statement following CTE") 3164 return cte 3165 3166 if "with" in this.arg_types: 3167 this.set("with", cte) 3168 else: 3169 self.raise_error(f"{this.key} does not support CTE") 3170 this = cte 3171 3172 return this 3173 3174 # duckdb supports leading with FROM x 3175 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3176 3177 if self._match(TokenType.SELECT): 3178 comments = self._prev_comments 3179 3180 hint = self._parse_hint() 3181 3182 if self._next and not self._next.token_type == TokenType.DOT: 3183 all_ = self._match(TokenType.ALL) 3184 distinct = self._match_set(self.DISTINCT_TOKENS) 3185 else: 3186 all_, distinct = None, None 3187 3188 kind = ( 3189 self._match(TokenType.ALIAS) 3190 and self._match_texts(("STRUCT", "VALUE")) 3191 and self._prev.text.upper() 3192 ) 3193 3194 if distinct: 3195 distinct = self.expression( 3196 exp.Distinct, 3197 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3198 ) 3199 3200 if all_ and distinct: 3201 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3202 3203 operation_modifiers = [] 3204 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3205 operation_modifiers.append(exp.var(self._prev.text.upper())) 3206 3207 limit = self._parse_limit(top=True) 3208 projections = self._parse_projections() 3209 3210 this = self.expression( 3211 exp.Select, 3212 kind=kind, 3213 hint=hint, 3214 distinct=distinct, 3215 expressions=projections, 3216 limit=limit, 3217 operation_modifiers=operation_modifiers or None, 3218 ) 3219 this.comments = comments 3220 3221 into = self._parse_into() 3222 if into: 3223 this.set("into", into) 3224 3225 if not from_: 3226 from_ = self._parse_from() 3227 3228 if from_: 3229 this.set("from", from_) 3230 3231 this = self._parse_query_modifiers(this) 3232 elif (table or nested) and self._match(TokenType.L_PAREN): 3233 this = self._parse_wrapped_select(table=table) 3234 3235 # We return early here so that the UNION isn't attached to the subquery by the 3236 # following call to _parse_set_operations, but instead becomes the parent node 3237 self._match_r_paren() 3238 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3239 elif self._match(TokenType.VALUES, advance=False): 3240 this = self._parse_derived_table_values() 3241 elif from_: 3242 this = exp.select("*").from_(from_.this, copy=False) 3243 elif self._match(TokenType.SUMMARIZE): 3244 table = self._match(TokenType.TABLE) 3245 this = self._parse_select() or self._parse_string() or self._parse_table() 3246 return self.expression(exp.Summarize, this=this, table=table) 3247 elif self._match(TokenType.DESCRIBE): 3248 this = self._parse_describe() 3249 elif self._match_text_seq("STREAM"): 3250 this = self._parse_function() 3251 if this: 3252 this = self.expression(exp.Stream, this=this) 3253 else: 3254 self._retreat(self._index - 1) 3255 else: 3256 this = None 3257 3258 return self._parse_set_operations(this) if parse_set_operation else this 3259 3260 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3261 self._match_text_seq("SEARCH") 3262 3263 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3264 3265 if not kind: 3266 return None 3267 3268 self._match_text_seq("FIRST", "BY") 3269 3270 return self.expression( 3271 exp.RecursiveWithSearch, 3272 kind=kind, 3273 this=self._parse_id_var(), 3274 expression=self._match_text_seq("SET") and self._parse_id_var(), 3275 using=self._match_text_seq("USING") and self._parse_id_var(), 3276 ) 3277 3278 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3279 if not skip_with_token and not self._match(TokenType.WITH): 3280 return None 3281 3282 comments = self._prev_comments 3283 recursive = self._match(TokenType.RECURSIVE) 3284 3285 last_comments = None 3286 expressions = [] 3287 while True: 3288 cte = self._parse_cte() 3289 if isinstance(cte, exp.CTE): 3290 expressions.append(cte) 3291 if last_comments: 3292 cte.add_comments(last_comments) 3293 3294 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3295 break 3296 else: 3297 self._match(TokenType.WITH) 3298 3299 last_comments = self._prev_comments 3300 3301 return self.expression( 3302 exp.With, 3303 comments=comments, 3304 expressions=expressions, 3305 recursive=recursive, 3306 search=self._parse_recursive_with_search(), 3307 ) 3308 3309 def _parse_cte(self) -> t.Optional[exp.CTE]: 3310 index = self._index 3311 3312 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3313 if not alias or not alias.this: 3314 self.raise_error("Expected CTE to have alias") 3315 3316 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3317 self._retreat(index) 3318 return None 3319 3320 comments = self._prev_comments 3321 3322 if self._match_text_seq("NOT", "MATERIALIZED"): 3323 materialized = False 3324 elif self._match_text_seq("MATERIALIZED"): 3325 materialized = True 3326 else: 3327 materialized = None 3328 3329 cte = self.expression( 3330 exp.CTE, 3331 this=self._parse_wrapped(self._parse_statement), 3332 alias=alias, 3333 materialized=materialized, 3334 comments=comments, 3335 ) 3336 3337 if isinstance(cte.this, exp.Values): 3338 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3339 3340 return cte 3341 3342 def _parse_table_alias( 3343 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3344 ) -> t.Optional[exp.TableAlias]: 3345 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3346 # so this section tries to parse the clause version and if it fails, it treats the token 3347 # as an identifier (alias) 3348 if self._can_parse_limit_or_offset(): 3349 return None 3350 3351 any_token = self._match(TokenType.ALIAS) 3352 alias = ( 3353 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3354 or self._parse_string_as_identifier() 3355 ) 3356 3357 index = self._index 3358 if self._match(TokenType.L_PAREN): 3359 columns = self._parse_csv(self._parse_function_parameter) 3360 self._match_r_paren() if columns else self._retreat(index) 3361 else: 3362 columns = None 3363 3364 if not alias and not columns: 3365 return None 3366 3367 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3368 3369 # We bubble up comments from the Identifier to the TableAlias 3370 if isinstance(alias, exp.Identifier): 3371 table_alias.add_comments(alias.pop_comments()) 3372 3373 return table_alias 3374 3375 def _parse_subquery( 3376 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3377 ) -> t.Optional[exp.Subquery]: 3378 if not this: 3379 return None 3380 3381 return self.expression( 3382 exp.Subquery, 3383 this=this, 3384 pivots=self._parse_pivots(), 3385 alias=self._parse_table_alias() if parse_alias else None, 3386 sample=self._parse_table_sample(), 3387 ) 3388 3389 def _implicit_unnests_to_explicit(self, this: E) -> E: 3390 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3391 3392 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3393 for i, join in enumerate(this.args.get("joins") or []): 3394 table = join.this 3395 normalized_table = table.copy() 3396 normalized_table.meta["maybe_column"] = True 3397 normalized_table = _norm(normalized_table, dialect=self.dialect) 3398 3399 if isinstance(table, exp.Table) and not join.args.get("on"): 3400 if normalized_table.parts[0].name in refs: 3401 table_as_column = table.to_column() 3402 unnest = exp.Unnest(expressions=[table_as_column]) 3403 3404 # Table.to_column creates a parent Alias node that we want to convert to 3405 # a TableAlias and attach to the Unnest, so it matches the parser's output 3406 if isinstance(table.args.get("alias"), exp.TableAlias): 3407 table_as_column.replace(table_as_column.this) 3408 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3409 3410 table.replace(unnest) 3411 3412 refs.add(normalized_table.alias_or_name) 3413 3414 return this 3415 3416 def _parse_query_modifiers( 3417 self, this: t.Optional[exp.Expression] 3418 ) -> t.Optional[exp.Expression]: 3419 if isinstance(this, self.MODIFIABLES): 3420 for join in self._parse_joins(): 3421 this.append("joins", join) 3422 for lateral in iter(self._parse_lateral, None): 3423 this.append("laterals", lateral) 3424 3425 while True: 3426 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3427 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3428 key, expression = parser(self) 3429 3430 if expression: 3431 this.set(key, expression) 3432 if key == "limit": 3433 offset = expression.args.pop("offset", None) 3434 3435 if offset: 3436 offset = exp.Offset(expression=offset) 3437 this.set("offset", offset) 3438 3439 limit_by_expressions = expression.expressions 3440 expression.set("expressions", None) 3441 offset.set("expressions", limit_by_expressions) 3442 continue 3443 break 3444 3445 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3446 this = self._implicit_unnests_to_explicit(this) 3447 3448 return this 3449 3450 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3451 start = self._curr 3452 while self._curr: 3453 self._advance() 3454 3455 end = self._tokens[self._index - 1] 3456 return exp.Hint(expressions=[self._find_sql(start, end)]) 3457 3458 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3459 return self._parse_function_call() 3460 3461 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3462 start_index = self._index 3463 should_fallback_to_string = False 3464 3465 hints = [] 3466 try: 3467 for hint in iter( 3468 lambda: self._parse_csv( 3469 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3470 ), 3471 [], 3472 ): 3473 hints.extend(hint) 3474 except ParseError: 3475 should_fallback_to_string = True 3476 3477 if should_fallback_to_string or self._curr: 3478 self._retreat(start_index) 3479 return self._parse_hint_fallback_to_string() 3480 3481 return self.expression(exp.Hint, expressions=hints) 3482 3483 def _parse_hint(self) -> t.Optional[exp.Hint]: 3484 if self._match(TokenType.HINT) and self._prev_comments: 3485 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3486 3487 return None 3488 3489 def _parse_into(self) -> t.Optional[exp.Into]: 3490 if not self._match(TokenType.INTO): 3491 return None 3492 3493 temp = self._match(TokenType.TEMPORARY) 3494 unlogged = self._match_text_seq("UNLOGGED") 3495 self._match(TokenType.TABLE) 3496 3497 return self.expression( 3498 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3499 ) 3500 3501 def _parse_from( 3502 self, joins: bool = False, skip_from_token: bool = False 3503 ) -> t.Optional[exp.From]: 3504 if not skip_from_token and not self._match(TokenType.FROM): 3505 return None 3506 3507 return self.expression( 3508 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3509 ) 3510 3511 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3512 return self.expression( 3513 exp.MatchRecognizeMeasure, 3514 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3515 this=self._parse_expression(), 3516 ) 3517 3518 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3519 if not self._match(TokenType.MATCH_RECOGNIZE): 3520 return None 3521 3522 self._match_l_paren() 3523 3524 partition = self._parse_partition_by() 3525 order = self._parse_order() 3526 3527 measures = ( 3528 self._parse_csv(self._parse_match_recognize_measure) 3529 if self._match_text_seq("MEASURES") 3530 else None 3531 ) 3532 3533 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3534 rows = exp.var("ONE ROW PER MATCH") 3535 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3536 text = "ALL ROWS PER MATCH" 3537 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3538 text += " SHOW EMPTY MATCHES" 3539 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3540 text += " OMIT EMPTY MATCHES" 3541 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3542 text += " WITH UNMATCHED ROWS" 3543 rows = exp.var(text) 3544 else: 3545 rows = None 3546 3547 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3548 text = "AFTER MATCH SKIP" 3549 if self._match_text_seq("PAST", "LAST", "ROW"): 3550 text += " PAST LAST ROW" 3551 elif self._match_text_seq("TO", "NEXT", "ROW"): 3552 text += " TO NEXT ROW" 3553 elif self._match_text_seq("TO", "FIRST"): 3554 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3555 elif self._match_text_seq("TO", "LAST"): 3556 text += f" TO LAST {self._advance_any().text}" # type: ignore 3557 after = exp.var(text) 3558 else: 3559 after = None 3560 3561 if self._match_text_seq("PATTERN"): 3562 self._match_l_paren() 3563 3564 if not self._curr: 3565 self.raise_error("Expecting )", self._curr) 3566 3567 paren = 1 3568 start = self._curr 3569 3570 while self._curr and paren > 0: 3571 if self._curr.token_type == TokenType.L_PAREN: 3572 paren += 1 3573 if self._curr.token_type == TokenType.R_PAREN: 3574 paren -= 1 3575 3576 end = self._prev 3577 self._advance() 3578 3579 if paren > 0: 3580 self.raise_error("Expecting )", self._curr) 3581 3582 pattern = exp.var(self._find_sql(start, end)) 3583 else: 3584 pattern = None 3585 3586 define = ( 3587 self._parse_csv(self._parse_name_as_expression) 3588 if self._match_text_seq("DEFINE") 3589 else None 3590 ) 3591 3592 self._match_r_paren() 3593 3594 return self.expression( 3595 exp.MatchRecognize, 3596 partition_by=partition, 3597 order=order, 3598 measures=measures, 3599 rows=rows, 3600 after=after, 3601 pattern=pattern, 3602 define=define, 3603 alias=self._parse_table_alias(), 3604 ) 3605 3606 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3607 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3608 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3609 cross_apply = False 3610 3611 if cross_apply is not None: 3612 this = self._parse_select(table=True) 3613 view = None 3614 outer = None 3615 elif self._match(TokenType.LATERAL): 3616 this = self._parse_select(table=True) 3617 view = self._match(TokenType.VIEW) 3618 outer = self._match(TokenType.OUTER) 3619 else: 3620 return None 3621 3622 if not this: 3623 this = ( 3624 self._parse_unnest() 3625 or self._parse_function() 3626 or self._parse_id_var(any_token=False) 3627 ) 3628 3629 while self._match(TokenType.DOT): 3630 this = exp.Dot( 3631 this=this, 3632 expression=self._parse_function() or self._parse_id_var(any_token=False), 3633 ) 3634 3635 ordinality: t.Optional[bool] = None 3636 3637 if view: 3638 table = self._parse_id_var(any_token=False) 3639 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3640 table_alias: t.Optional[exp.TableAlias] = self.expression( 3641 exp.TableAlias, this=table, columns=columns 3642 ) 3643 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3644 # We move the alias from the lateral's child node to the lateral itself 3645 table_alias = this.args["alias"].pop() 3646 else: 3647 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3648 table_alias = self._parse_table_alias() 3649 3650 return self.expression( 3651 exp.Lateral, 3652 this=this, 3653 view=view, 3654 outer=outer, 3655 alias=table_alias, 3656 cross_apply=cross_apply, 3657 ordinality=ordinality, 3658 ) 3659 3660 def _parse_join_parts( 3661 self, 3662 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3663 return ( 3664 self._match_set(self.JOIN_METHODS) and self._prev, 3665 self._match_set(self.JOIN_SIDES) and self._prev, 3666 self._match_set(self.JOIN_KINDS) and self._prev, 3667 ) 3668 3669 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3670 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3671 this = self._parse_column() 3672 if isinstance(this, exp.Column): 3673 return this.this 3674 return this 3675 3676 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3677 3678 def _parse_join( 3679 self, skip_join_token: bool = False, parse_bracket: bool = False 3680 ) -> t.Optional[exp.Join]: 3681 if self._match(TokenType.COMMA): 3682 table = self._try_parse(self._parse_table) 3683 if table: 3684 return self.expression(exp.Join, this=table) 3685 return None 3686 3687 index = self._index 3688 method, side, kind = self._parse_join_parts() 3689 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3690 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3691 3692 if not skip_join_token and not join: 3693 self._retreat(index) 3694 kind = None 3695 method = None 3696 side = None 3697 3698 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3699 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3700 3701 if not skip_join_token and not join and not outer_apply and not cross_apply: 3702 return None 3703 3704 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3705 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3706 kwargs["expressions"] = self._parse_csv( 3707 lambda: self._parse_table(parse_bracket=parse_bracket) 3708 ) 3709 3710 if method: 3711 kwargs["method"] = method.text 3712 if side: 3713 kwargs["side"] = side.text 3714 if kind: 3715 kwargs["kind"] = kind.text 3716 if hint: 3717 kwargs["hint"] = hint 3718 3719 if self._match(TokenType.MATCH_CONDITION): 3720 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3721 3722 if self._match(TokenType.ON): 3723 kwargs["on"] = self._parse_assignment() 3724 elif self._match(TokenType.USING): 3725 kwargs["using"] = self._parse_using_identifiers() 3726 elif ( 3727 not (outer_apply or cross_apply) 3728 and not isinstance(kwargs["this"], exp.Unnest) 3729 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3730 ): 3731 index = self._index 3732 joins: t.Optional[list] = list(self._parse_joins()) 3733 3734 if joins and self._match(TokenType.ON): 3735 kwargs["on"] = self._parse_assignment() 3736 elif joins and self._match(TokenType.USING): 3737 kwargs["using"] = self._parse_using_identifiers() 3738 else: 3739 joins = None 3740 self._retreat(index) 3741 3742 kwargs["this"].set("joins", joins if joins else None) 3743 3744 kwargs["pivots"] = self._parse_pivots() 3745 3746 comments = [c for token in (method, side, kind) if token for c in token.comments] 3747 return self.expression(exp.Join, comments=comments, **kwargs) 3748 3749 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3750 this = self._parse_assignment() 3751 3752 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3753 return this 3754 3755 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3756 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3757 3758 return this 3759 3760 def _parse_index_params(self) -> exp.IndexParameters: 3761 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3762 3763 if self._match(TokenType.L_PAREN, advance=False): 3764 columns = self._parse_wrapped_csv(self._parse_with_operator) 3765 else: 3766 columns = None 3767 3768 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3769 partition_by = self._parse_partition_by() 3770 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3771 tablespace = ( 3772 self._parse_var(any_token=True) 3773 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3774 else None 3775 ) 3776 where = self._parse_where() 3777 3778 on = self._parse_field() if self._match(TokenType.ON) else None 3779 3780 return self.expression( 3781 exp.IndexParameters, 3782 using=using, 3783 columns=columns, 3784 include=include, 3785 partition_by=partition_by, 3786 where=where, 3787 with_storage=with_storage, 3788 tablespace=tablespace, 3789 on=on, 3790 ) 3791 3792 def _parse_index( 3793 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3794 ) -> t.Optional[exp.Index]: 3795 if index or anonymous: 3796 unique = None 3797 primary = None 3798 amp = None 3799 3800 self._match(TokenType.ON) 3801 self._match(TokenType.TABLE) # hive 3802 table = self._parse_table_parts(schema=True) 3803 else: 3804 unique = self._match(TokenType.UNIQUE) 3805 primary = self._match_text_seq("PRIMARY") 3806 amp = self._match_text_seq("AMP") 3807 3808 if not self._match(TokenType.INDEX): 3809 return None 3810 3811 index = self._parse_id_var() 3812 table = None 3813 3814 params = self._parse_index_params() 3815 3816 return self.expression( 3817 exp.Index, 3818 this=index, 3819 table=table, 3820 unique=unique, 3821 primary=primary, 3822 amp=amp, 3823 params=params, 3824 ) 3825 3826 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3827 hints: t.List[exp.Expression] = [] 3828 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3829 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3830 hints.append( 3831 self.expression( 3832 exp.WithTableHint, 3833 expressions=self._parse_csv( 3834 lambda: self._parse_function() or self._parse_var(any_token=True) 3835 ), 3836 ) 3837 ) 3838 self._match_r_paren() 3839 else: 3840 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3841 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3842 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3843 3844 self._match_set((TokenType.INDEX, TokenType.KEY)) 3845 if self._match(TokenType.FOR): 3846 hint.set("target", self._advance_any() and self._prev.text.upper()) 3847 3848 hint.set("expressions", self._parse_wrapped_id_vars()) 3849 hints.append(hint) 3850 3851 return hints or None 3852 3853 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3854 return ( 3855 (not schema and self._parse_function(optional_parens=False)) 3856 or self._parse_id_var(any_token=False) 3857 or self._parse_string_as_identifier() 3858 or self._parse_placeholder() 3859 ) 3860 3861 def _parse_table_parts( 3862 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3863 ) -> exp.Table: 3864 catalog = None 3865 db = None 3866 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3867 3868 while self._match(TokenType.DOT): 3869 if catalog: 3870 # This allows nesting the table in arbitrarily many dot expressions if needed 3871 table = self.expression( 3872 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3873 ) 3874 else: 3875 catalog = db 3876 db = table 3877 # "" used for tsql FROM a..b case 3878 table = self._parse_table_part(schema=schema) or "" 3879 3880 if ( 3881 wildcard 3882 and self._is_connected() 3883 and (isinstance(table, exp.Identifier) or not table) 3884 and self._match(TokenType.STAR) 3885 ): 3886 if isinstance(table, exp.Identifier): 3887 table.args["this"] += "*" 3888 else: 3889 table = exp.Identifier(this="*") 3890 3891 # We bubble up comments from the Identifier to the Table 3892 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3893 3894 if is_db_reference: 3895 catalog = db 3896 db = table 3897 table = None 3898 3899 if not table and not is_db_reference: 3900 self.raise_error(f"Expected table name but got {self._curr}") 3901 if not db and is_db_reference: 3902 self.raise_error(f"Expected database name but got {self._curr}") 3903 3904 table = self.expression( 3905 exp.Table, 3906 comments=comments, 3907 this=table, 3908 db=db, 3909 catalog=catalog, 3910 ) 3911 3912 changes = self._parse_changes() 3913 if changes: 3914 table.set("changes", changes) 3915 3916 at_before = self._parse_historical_data() 3917 if at_before: 3918 table.set("when", at_before) 3919 3920 pivots = self._parse_pivots() 3921 if pivots: 3922 table.set("pivots", pivots) 3923 3924 return table 3925 3926 def _parse_table( 3927 self, 3928 schema: bool = False, 3929 joins: bool = False, 3930 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3931 parse_bracket: bool = False, 3932 is_db_reference: bool = False, 3933 parse_partition: bool = False, 3934 ) -> t.Optional[exp.Expression]: 3935 lateral = self._parse_lateral() 3936 if lateral: 3937 return lateral 3938 3939 unnest = self._parse_unnest() 3940 if unnest: 3941 return unnest 3942 3943 values = self._parse_derived_table_values() 3944 if values: 3945 return values 3946 3947 subquery = self._parse_select(table=True) 3948 if subquery: 3949 if not subquery.args.get("pivots"): 3950 subquery.set("pivots", self._parse_pivots()) 3951 return subquery 3952 3953 bracket = parse_bracket and self._parse_bracket(None) 3954 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3955 3956 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3957 self._parse_table 3958 ) 3959 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3960 3961 only = self._match(TokenType.ONLY) 3962 3963 this = t.cast( 3964 exp.Expression, 3965 bracket 3966 or rows_from 3967 or self._parse_bracket( 3968 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3969 ), 3970 ) 3971 3972 if only: 3973 this.set("only", only) 3974 3975 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3976 self._match_text_seq("*") 3977 3978 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3979 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3980 this.set("partition", self._parse_partition()) 3981 3982 if schema: 3983 return self._parse_schema(this=this) 3984 3985 version = self._parse_version() 3986 3987 if version: 3988 this.set("version", version) 3989 3990 if self.dialect.ALIAS_POST_TABLESAMPLE: 3991 this.set("sample", self._parse_table_sample()) 3992 3993 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3994 if alias: 3995 this.set("alias", alias) 3996 3997 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3998 return self.expression( 3999 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4000 ) 4001 4002 this.set("hints", self._parse_table_hints()) 4003 4004 if not this.args.get("pivots"): 4005 this.set("pivots", self._parse_pivots()) 4006 4007 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4008 this.set("sample", self._parse_table_sample()) 4009 4010 if joins: 4011 for join in self._parse_joins(): 4012 this.append("joins", join) 4013 4014 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4015 this.set("ordinality", True) 4016 this.set("alias", self._parse_table_alias()) 4017 4018 return this 4019 4020 def _parse_version(self) -> t.Optional[exp.Version]: 4021 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4022 this = "TIMESTAMP" 4023 elif self._match(TokenType.VERSION_SNAPSHOT): 4024 this = "VERSION" 4025 else: 4026 return None 4027 4028 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4029 kind = self._prev.text.upper() 4030 start = self._parse_bitwise() 4031 self._match_texts(("TO", "AND")) 4032 end = self._parse_bitwise() 4033 expression: t.Optional[exp.Expression] = self.expression( 4034 exp.Tuple, expressions=[start, end] 4035 ) 4036 elif self._match_text_seq("CONTAINED", "IN"): 4037 kind = "CONTAINED IN" 4038 expression = self.expression( 4039 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4040 ) 4041 elif self._match(TokenType.ALL): 4042 kind = "ALL" 4043 expression = None 4044 else: 4045 self._match_text_seq("AS", "OF") 4046 kind = "AS OF" 4047 expression = self._parse_type() 4048 4049 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4050 4051 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4052 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4053 index = self._index 4054 historical_data = None 4055 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4056 this = self._prev.text.upper() 4057 kind = ( 4058 self._match(TokenType.L_PAREN) 4059 and self._match_texts(self.HISTORICAL_DATA_KIND) 4060 and self._prev.text.upper() 4061 ) 4062 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4063 4064 if expression: 4065 self._match_r_paren() 4066 historical_data = self.expression( 4067 exp.HistoricalData, this=this, kind=kind, expression=expression 4068 ) 4069 else: 4070 self._retreat(index) 4071 4072 return historical_data 4073 4074 def _parse_changes(self) -> t.Optional[exp.Changes]: 4075 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4076 return None 4077 4078 information = self._parse_var(any_token=True) 4079 self._match_r_paren() 4080 4081 return self.expression( 4082 exp.Changes, 4083 information=information, 4084 at_before=self._parse_historical_data(), 4085 end=self._parse_historical_data(), 4086 ) 4087 4088 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4089 if not self._match(TokenType.UNNEST): 4090 return None 4091 4092 expressions = self._parse_wrapped_csv(self._parse_equality) 4093 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4094 4095 alias = self._parse_table_alias() if with_alias else None 4096 4097 if alias: 4098 if self.dialect.UNNEST_COLUMN_ONLY: 4099 if alias.args.get("columns"): 4100 self.raise_error("Unexpected extra column alias in unnest.") 4101 4102 alias.set("columns", [alias.this]) 4103 alias.set("this", None) 4104 4105 columns = alias.args.get("columns") or [] 4106 if offset and len(expressions) < len(columns): 4107 offset = columns.pop() 4108 4109 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4110 self._match(TokenType.ALIAS) 4111 offset = self._parse_id_var( 4112 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4113 ) or exp.to_identifier("offset") 4114 4115 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4116 4117 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4118 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4119 if not is_derived and not ( 4120 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4121 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4122 ): 4123 return None 4124 4125 expressions = self._parse_csv(self._parse_value) 4126 alias = self._parse_table_alias() 4127 4128 if is_derived: 4129 self._match_r_paren() 4130 4131 return self.expression( 4132 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4133 ) 4134 4135 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4136 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4137 as_modifier and self._match_text_seq("USING", "SAMPLE") 4138 ): 4139 return None 4140 4141 bucket_numerator = None 4142 bucket_denominator = None 4143 bucket_field = None 4144 percent = None 4145 size = None 4146 seed = None 4147 4148 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4149 matched_l_paren = self._match(TokenType.L_PAREN) 4150 4151 if self.TABLESAMPLE_CSV: 4152 num = None 4153 expressions = self._parse_csv(self._parse_primary) 4154 else: 4155 expressions = None 4156 num = ( 4157 self._parse_factor() 4158 if self._match(TokenType.NUMBER, advance=False) 4159 else self._parse_primary() or self._parse_placeholder() 4160 ) 4161 4162 if self._match_text_seq("BUCKET"): 4163 bucket_numerator = self._parse_number() 4164 self._match_text_seq("OUT", "OF") 4165 bucket_denominator = bucket_denominator = self._parse_number() 4166 self._match(TokenType.ON) 4167 bucket_field = self._parse_field() 4168 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4169 percent = num 4170 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4171 size = num 4172 else: 4173 percent = num 4174 4175 if matched_l_paren: 4176 self._match_r_paren() 4177 4178 if self._match(TokenType.L_PAREN): 4179 method = self._parse_var(upper=True) 4180 seed = self._match(TokenType.COMMA) and self._parse_number() 4181 self._match_r_paren() 4182 elif self._match_texts(("SEED", "REPEATABLE")): 4183 seed = self._parse_wrapped(self._parse_number) 4184 4185 if not method and self.DEFAULT_SAMPLING_METHOD: 4186 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4187 4188 return self.expression( 4189 exp.TableSample, 4190 expressions=expressions, 4191 method=method, 4192 bucket_numerator=bucket_numerator, 4193 bucket_denominator=bucket_denominator, 4194 bucket_field=bucket_field, 4195 percent=percent, 4196 size=size, 4197 seed=seed, 4198 ) 4199 4200 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4201 return list(iter(self._parse_pivot, None)) or None 4202 4203 def _parse_joins(self) -> t.Iterator[exp.Join]: 4204 return iter(self._parse_join, None) 4205 4206 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4207 if not self._match(TokenType.INTO): 4208 return None 4209 4210 return self.expression( 4211 exp.UnpivotColumns, 4212 this=self._match_text_seq("NAME") and self._parse_column(), 4213 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4214 ) 4215 4216 # https://duckdb.org/docs/sql/statements/pivot 4217 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4218 def _parse_on() -> t.Optional[exp.Expression]: 4219 this = self._parse_bitwise() 4220 4221 if self._match(TokenType.IN): 4222 # PIVOT ... ON col IN (row_val1, row_val2) 4223 return self._parse_in(this) 4224 if self._match(TokenType.ALIAS, advance=False): 4225 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4226 return self._parse_alias(this) 4227 4228 return this 4229 4230 this = self._parse_table() 4231 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4232 into = self._parse_unpivot_columns() 4233 using = self._match(TokenType.USING) and self._parse_csv( 4234 lambda: self._parse_alias(self._parse_function()) 4235 ) 4236 group = self._parse_group() 4237 4238 return self.expression( 4239 exp.Pivot, 4240 this=this, 4241 expressions=expressions, 4242 using=using, 4243 group=group, 4244 unpivot=is_unpivot, 4245 into=into, 4246 ) 4247 4248 def _parse_pivot_in(self) -> exp.In: 4249 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4250 this = self._parse_select_or_expression() 4251 4252 self._match(TokenType.ALIAS) 4253 alias = self._parse_bitwise() 4254 if alias: 4255 if isinstance(alias, exp.Column) and not alias.db: 4256 alias = alias.this 4257 return self.expression(exp.PivotAlias, this=this, alias=alias) 4258 4259 return this 4260 4261 value = self._parse_column() 4262 4263 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4264 self.raise_error("Expecting IN (") 4265 4266 if self._match(TokenType.ANY): 4267 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4268 else: 4269 exprs = self._parse_csv(_parse_aliased_expression) 4270 4271 self._match_r_paren() 4272 return self.expression(exp.In, this=value, expressions=exprs) 4273 4274 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4275 index = self._index 4276 include_nulls = None 4277 4278 if self._match(TokenType.PIVOT): 4279 unpivot = False 4280 elif self._match(TokenType.UNPIVOT): 4281 unpivot = True 4282 4283 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4284 if self._match_text_seq("INCLUDE", "NULLS"): 4285 include_nulls = True 4286 elif self._match_text_seq("EXCLUDE", "NULLS"): 4287 include_nulls = False 4288 else: 4289 return None 4290 4291 expressions = [] 4292 4293 if not self._match(TokenType.L_PAREN): 4294 self._retreat(index) 4295 return None 4296 4297 if unpivot: 4298 expressions = self._parse_csv(self._parse_column) 4299 else: 4300 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4301 4302 if not expressions: 4303 self.raise_error("Failed to parse PIVOT's aggregation list") 4304 4305 if not self._match(TokenType.FOR): 4306 self.raise_error("Expecting FOR") 4307 4308 fields = [] 4309 while True: 4310 field = self._try_parse(self._parse_pivot_in) 4311 if not field: 4312 break 4313 fields.append(field) 4314 4315 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4316 self._parse_bitwise 4317 ) 4318 4319 group = self._parse_group() 4320 4321 self._match_r_paren() 4322 4323 pivot = self.expression( 4324 exp.Pivot, 4325 expressions=expressions, 4326 fields=fields, 4327 unpivot=unpivot, 4328 include_nulls=include_nulls, 4329 default_on_null=default_on_null, 4330 group=group, 4331 ) 4332 4333 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4334 pivot.set("alias", self._parse_table_alias()) 4335 4336 if not unpivot: 4337 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4338 4339 columns: t.List[exp.Expression] = [] 4340 all_fields = [] 4341 for pivot_field in pivot.fields: 4342 pivot_field_expressions = pivot_field.expressions 4343 4344 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4345 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4346 continue 4347 4348 all_fields.append( 4349 [ 4350 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4351 for fld in pivot_field_expressions 4352 ] 4353 ) 4354 4355 if all_fields: 4356 if names: 4357 all_fields.append(names) 4358 4359 # Generate all possible combinations of the pivot columns 4360 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4361 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4362 for fld_parts_tuple in itertools.product(*all_fields): 4363 fld_parts = list(fld_parts_tuple) 4364 4365 if names and self.PREFIXED_PIVOT_COLUMNS: 4366 # Move the "name" to the front of the list 4367 fld_parts.insert(0, fld_parts.pop(-1)) 4368 4369 columns.append(exp.to_identifier("_".join(fld_parts))) 4370 4371 pivot.set("columns", columns) 4372 4373 return pivot 4374 4375 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4376 return [agg.alias for agg in aggregations if agg.alias] 4377 4378 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4379 if not skip_where_token and not self._match(TokenType.PREWHERE): 4380 return None 4381 4382 return self.expression( 4383 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4384 ) 4385 4386 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4387 if not skip_where_token and not self._match(TokenType.WHERE): 4388 return None 4389 4390 return self.expression( 4391 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4392 ) 4393 4394 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4395 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4396 return None 4397 4398 elements: t.Dict[str, t.Any] = defaultdict(list) 4399 4400 if self._match(TokenType.ALL): 4401 elements["all"] = True 4402 elif self._match(TokenType.DISTINCT): 4403 elements["all"] = False 4404 4405 while True: 4406 index = self._index 4407 4408 elements["expressions"].extend( 4409 self._parse_csv( 4410 lambda: None 4411 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4412 else self._parse_assignment() 4413 ) 4414 ) 4415 4416 before_with_index = self._index 4417 with_prefix = self._match(TokenType.WITH) 4418 4419 if self._match(TokenType.ROLLUP): 4420 elements["rollup"].append( 4421 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4422 ) 4423 elif self._match(TokenType.CUBE): 4424 elements["cube"].append( 4425 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4426 ) 4427 elif self._match(TokenType.GROUPING_SETS): 4428 elements["grouping_sets"].append( 4429 self.expression( 4430 exp.GroupingSets, 4431 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4432 ) 4433 ) 4434 elif self._match_text_seq("TOTALS"): 4435 elements["totals"] = True # type: ignore 4436 4437 if before_with_index <= self._index <= before_with_index + 1: 4438 self._retreat(before_with_index) 4439 break 4440 4441 if index == self._index: 4442 break 4443 4444 return self.expression(exp.Group, **elements) # type: ignore 4445 4446 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4447 return self.expression( 4448 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4449 ) 4450 4451 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4452 if self._match(TokenType.L_PAREN): 4453 grouping_set = self._parse_csv(self._parse_column) 4454 self._match_r_paren() 4455 return self.expression(exp.Tuple, expressions=grouping_set) 4456 4457 return self._parse_column() 4458 4459 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4460 if not skip_having_token and not self._match(TokenType.HAVING): 4461 return None 4462 return self.expression(exp.Having, this=self._parse_assignment()) 4463 4464 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4465 if not self._match(TokenType.QUALIFY): 4466 return None 4467 return self.expression(exp.Qualify, this=self._parse_assignment()) 4468 4469 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4470 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4471 exp.Prior, this=self._parse_bitwise() 4472 ) 4473 connect = self._parse_assignment() 4474 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4475 return connect 4476 4477 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4478 if skip_start_token: 4479 start = None 4480 elif self._match(TokenType.START_WITH): 4481 start = self._parse_assignment() 4482 else: 4483 return None 4484 4485 self._match(TokenType.CONNECT_BY) 4486 nocycle = self._match_text_seq("NOCYCLE") 4487 connect = self._parse_connect_with_prior() 4488 4489 if not start and self._match(TokenType.START_WITH): 4490 start = self._parse_assignment() 4491 4492 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4493 4494 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4495 this = self._parse_id_var(any_token=True) 4496 if self._match(TokenType.ALIAS): 4497 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4498 return this 4499 4500 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4501 if self._match_text_seq("INTERPOLATE"): 4502 return self._parse_wrapped_csv(self._parse_name_as_expression) 4503 return None 4504 4505 def _parse_order( 4506 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4507 ) -> t.Optional[exp.Expression]: 4508 siblings = None 4509 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4510 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4511 return this 4512 4513 siblings = True 4514 4515 return self.expression( 4516 exp.Order, 4517 this=this, 4518 expressions=self._parse_csv(self._parse_ordered), 4519 siblings=siblings, 4520 ) 4521 4522 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4523 if not self._match(token): 4524 return None 4525 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4526 4527 def _parse_ordered( 4528 self, parse_method: t.Optional[t.Callable] = None 4529 ) -> t.Optional[exp.Ordered]: 4530 this = parse_method() if parse_method else self._parse_assignment() 4531 if not this: 4532 return None 4533 4534 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4535 this = exp.var("ALL") 4536 4537 asc = self._match(TokenType.ASC) 4538 desc = self._match(TokenType.DESC) or (asc and False) 4539 4540 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4541 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4542 4543 nulls_first = is_nulls_first or False 4544 explicitly_null_ordered = is_nulls_first or is_nulls_last 4545 4546 if ( 4547 not explicitly_null_ordered 4548 and ( 4549 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4550 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4551 ) 4552 and self.dialect.NULL_ORDERING != "nulls_are_last" 4553 ): 4554 nulls_first = True 4555 4556 if self._match_text_seq("WITH", "FILL"): 4557 with_fill = self.expression( 4558 exp.WithFill, 4559 **{ # type: ignore 4560 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4561 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4562 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4563 "interpolate": self._parse_interpolate(), 4564 }, 4565 ) 4566 else: 4567 with_fill = None 4568 4569 return self.expression( 4570 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4571 ) 4572 4573 def _parse_limit_options(self) -> exp.LimitOptions: 4574 percent = self._match(TokenType.PERCENT) 4575 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4576 self._match_text_seq("ONLY") 4577 with_ties = self._match_text_seq("WITH", "TIES") 4578 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4579 4580 def _parse_limit( 4581 self, 4582 this: t.Optional[exp.Expression] = None, 4583 top: bool = False, 4584 skip_limit_token: bool = False, 4585 ) -> t.Optional[exp.Expression]: 4586 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4587 comments = self._prev_comments 4588 if top: 4589 limit_paren = self._match(TokenType.L_PAREN) 4590 expression = self._parse_term() if limit_paren else self._parse_number() 4591 4592 if limit_paren: 4593 self._match_r_paren() 4594 4595 limit_options = self._parse_limit_options() 4596 else: 4597 limit_options = None 4598 expression = self._parse_term() 4599 4600 if self._match(TokenType.COMMA): 4601 offset = expression 4602 expression = self._parse_term() 4603 else: 4604 offset = None 4605 4606 limit_exp = self.expression( 4607 exp.Limit, 4608 this=this, 4609 expression=expression, 4610 offset=offset, 4611 comments=comments, 4612 limit_options=limit_options, 4613 expressions=self._parse_limit_by(), 4614 ) 4615 4616 return limit_exp 4617 4618 if self._match(TokenType.FETCH): 4619 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4620 direction = self._prev.text.upper() if direction else "FIRST" 4621 4622 count = self._parse_field(tokens=self.FETCH_TOKENS) 4623 4624 return self.expression( 4625 exp.Fetch, 4626 direction=direction, 4627 count=count, 4628 limit_options=self._parse_limit_options(), 4629 ) 4630 4631 return this 4632 4633 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4634 if not self._match(TokenType.OFFSET): 4635 return this 4636 4637 count = self._parse_term() 4638 self._match_set((TokenType.ROW, TokenType.ROWS)) 4639 4640 return self.expression( 4641 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4642 ) 4643 4644 def _can_parse_limit_or_offset(self) -> bool: 4645 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4646 return False 4647 4648 index = self._index 4649 result = bool( 4650 self._try_parse(self._parse_limit, retreat=True) 4651 or self._try_parse(self._parse_offset, retreat=True) 4652 ) 4653 self._retreat(index) 4654 return result 4655 4656 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4657 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4658 4659 def _parse_locks(self) -> t.List[exp.Lock]: 4660 locks = [] 4661 while True: 4662 if self._match_text_seq("FOR", "UPDATE"): 4663 update = True 4664 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4665 "LOCK", "IN", "SHARE", "MODE" 4666 ): 4667 update = False 4668 else: 4669 break 4670 4671 expressions = None 4672 if self._match_text_seq("OF"): 4673 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4674 4675 wait: t.Optional[bool | exp.Expression] = None 4676 if self._match_text_seq("NOWAIT"): 4677 wait = True 4678 elif self._match_text_seq("WAIT"): 4679 wait = self._parse_primary() 4680 elif self._match_text_seq("SKIP", "LOCKED"): 4681 wait = False 4682 4683 locks.append( 4684 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4685 ) 4686 4687 return locks 4688 4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 ) 4745 4746 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4747 while this: 4748 setop = self.parse_set_operation(this) 4749 if not setop: 4750 break 4751 this = setop 4752 4753 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4754 expression = this.expression 4755 4756 if expression: 4757 for arg in self.SET_OP_MODIFIERS: 4758 expr = expression.args.get(arg) 4759 if expr: 4760 this.set(arg, expr.pop()) 4761 4762 return this 4763 4764 def _parse_expression(self) -> t.Optional[exp.Expression]: 4765 return self._parse_alias(self._parse_assignment()) 4766 4767 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_disjunction() 4769 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4770 # This allows us to parse <non-identifier token> := <expr> 4771 this = exp.column( 4772 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4773 ) 4774 4775 while self._match_set(self.ASSIGNMENT): 4776 if isinstance(this, exp.Column) and len(this.parts) == 1: 4777 this = this.this 4778 4779 this = self.expression( 4780 self.ASSIGNMENT[self._prev.token_type], 4781 this=this, 4782 comments=self._prev_comments, 4783 expression=self._parse_assignment(), 4784 ) 4785 4786 return this 4787 4788 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4789 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4790 4791 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4792 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4793 4794 def _parse_equality(self) -> t.Optional[exp.Expression]: 4795 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4796 4797 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4798 return self._parse_tokens(self._parse_range, self.COMPARISON) 4799 4800 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 this = this or self._parse_bitwise() 4802 negate = self._match(TokenType.NOT) 4803 4804 if self._match_set(self.RANGE_PARSERS): 4805 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4806 if not expression: 4807 return this 4808 4809 this = expression 4810 elif self._match(TokenType.ISNULL): 4811 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4812 4813 # Postgres supports ISNULL and NOTNULL for conditions. 4814 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4815 if self._match(TokenType.NOTNULL): 4816 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4817 this = self.expression(exp.Not, this=this) 4818 4819 if negate: 4820 this = self._negate_range(this) 4821 4822 if self._match(TokenType.IS): 4823 this = self._parse_is(this) 4824 4825 return this 4826 4827 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4828 if not this: 4829 return this 4830 4831 return self.expression(exp.Not, this=this) 4832 4833 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4834 index = self._index - 1 4835 negate = self._match(TokenType.NOT) 4836 4837 if self._match_text_seq("DISTINCT", "FROM"): 4838 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4839 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4840 4841 if self._match(TokenType.JSON): 4842 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4843 4844 if self._match_text_seq("WITH"): 4845 _with = True 4846 elif self._match_text_seq("WITHOUT"): 4847 _with = False 4848 else: 4849 _with = None 4850 4851 unique = self._match(TokenType.UNIQUE) 4852 self._match_text_seq("KEYS") 4853 expression: t.Optional[exp.Expression] = self.expression( 4854 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4855 ) 4856 else: 4857 expression = self._parse_primary() or self._parse_null() 4858 if not expression: 4859 self._retreat(index) 4860 return None 4861 4862 this = self.expression(exp.Is, this=this, expression=expression) 4863 return self.expression(exp.Not, this=this) if negate else this 4864 4865 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4866 unnest = self._parse_unnest(with_alias=False) 4867 if unnest: 4868 this = self.expression(exp.In, this=this, unnest=unnest) 4869 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4870 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4871 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4872 4873 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4874 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4875 else: 4876 this = self.expression(exp.In, this=this, expressions=expressions) 4877 4878 if matched_l_paren: 4879 self._match_r_paren(this) 4880 elif not self._match(TokenType.R_BRACKET, expression=this): 4881 self.raise_error("Expecting ]") 4882 else: 4883 this = self.expression(exp.In, this=this, field=self._parse_column()) 4884 4885 return this 4886 4887 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4888 low = self._parse_bitwise() 4889 self._match(TokenType.AND) 4890 high = self._parse_bitwise() 4891 return self.expression(exp.Between, this=this, low=low, high=high) 4892 4893 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4894 if not self._match(TokenType.ESCAPE): 4895 return this 4896 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4897 4898 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4899 index = self._index 4900 4901 if not self._match(TokenType.INTERVAL) and match_interval: 4902 return None 4903 4904 if self._match(TokenType.STRING, advance=False): 4905 this = self._parse_primary() 4906 else: 4907 this = self._parse_term() 4908 4909 if not this or ( 4910 isinstance(this, exp.Column) 4911 and not this.table 4912 and not this.this.quoted 4913 and this.name.upper() == "IS" 4914 ): 4915 self._retreat(index) 4916 return None 4917 4918 unit = self._parse_function() or ( 4919 not self._match(TokenType.ALIAS, advance=False) 4920 and self._parse_var(any_token=True, upper=True) 4921 ) 4922 4923 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4924 # each INTERVAL expression into this canonical form so it's easy to transpile 4925 if this and this.is_number: 4926 this = exp.Literal.string(this.to_py()) 4927 elif this and this.is_string: 4928 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4929 if parts and unit: 4930 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4931 unit = None 4932 self._retreat(self._index - 1) 4933 4934 if len(parts) == 1: 4935 this = exp.Literal.string(parts[0][0]) 4936 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4937 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4938 unit = self.expression( 4939 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4940 ) 4941 4942 interval = self.expression(exp.Interval, this=this, unit=unit) 4943 4944 index = self._index 4945 self._match(TokenType.PLUS) 4946 4947 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4948 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4949 return self.expression( 4950 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4951 ) 4952 4953 self._retreat(index) 4954 return interval 4955 4956 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_term() 4958 4959 while True: 4960 if self._match_set(self.BITWISE): 4961 this = self.expression( 4962 self.BITWISE[self._prev.token_type], 4963 this=this, 4964 expression=self._parse_term(), 4965 ) 4966 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4967 this = self.expression( 4968 exp.DPipe, 4969 this=this, 4970 expression=self._parse_term(), 4971 safe=not self.dialect.STRICT_STRING_CONCAT, 4972 ) 4973 elif self._match(TokenType.DQMARK): 4974 this = self.expression( 4975 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4976 ) 4977 elif self._match_pair(TokenType.LT, TokenType.LT): 4978 this = self.expression( 4979 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4980 ) 4981 elif self._match_pair(TokenType.GT, TokenType.GT): 4982 this = self.expression( 4983 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4984 ) 4985 else: 4986 break 4987 4988 return this 4989 4990 def _parse_term(self) -> t.Optional[exp.Expression]: 4991 this = self._parse_factor() 4992 4993 while self._match_set(self.TERM): 4994 klass = self.TERM[self._prev.token_type] 4995 comments = self._prev_comments 4996 expression = self._parse_factor() 4997 4998 this = self.expression(klass, this=this, comments=comments, expression=expression) 4999 5000 if isinstance(this, exp.Collate): 5001 expr = this.expression 5002 5003 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5004 # fallback to Identifier / Var 5005 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5006 ident = expr.this 5007 if isinstance(ident, exp.Identifier): 5008 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5009 5010 return this 5011 5012 def _parse_factor(self) -> t.Optional[exp.Expression]: 5013 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5014 this = parse_method() 5015 5016 while self._match_set(self.FACTOR): 5017 klass = self.FACTOR[self._prev.token_type] 5018 comments = self._prev_comments 5019 expression = parse_method() 5020 5021 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5022 self._retreat(self._index - 1) 5023 return this 5024 5025 this = self.expression(klass, this=this, comments=comments, expression=expression) 5026 5027 if isinstance(this, exp.Div): 5028 this.args["typed"] = self.dialect.TYPED_DIVISION 5029 this.args["safe"] = self.dialect.SAFE_DIVISION 5030 5031 return this 5032 5033 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5034 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5035 5036 def _parse_unary(self) -> t.Optional[exp.Expression]: 5037 if self._match_set(self.UNARY_PARSERS): 5038 return self.UNARY_PARSERS[self._prev.token_type](self) 5039 return self._parse_at_time_zone(self._parse_type()) 5040 5041 def _parse_type( 5042 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5043 ) -> t.Optional[exp.Expression]: 5044 interval = parse_interval and self._parse_interval() 5045 if interval: 5046 return interval 5047 5048 index = self._index 5049 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5050 5051 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5052 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5053 if isinstance(data_type, exp.Cast): 5054 # This constructor can contain ops directly after it, for instance struct unnesting: 5055 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5056 return self._parse_column_ops(data_type) 5057 5058 if data_type: 5059 index2 = self._index 5060 this = self._parse_primary() 5061 5062 if isinstance(this, exp.Literal): 5063 this = self._parse_column_ops(this) 5064 5065 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5066 if parser: 5067 return parser(self, this, data_type) 5068 5069 return self.expression(exp.Cast, this=this, to=data_type) 5070 5071 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5072 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5073 # 5074 # If the index difference here is greater than 1, that means the parser itself must have 5075 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5076 # 5077 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5078 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5079 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5080 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5081 # 5082 # In these cases, we don't really want to return the converted type, but instead retreat 5083 # and try to parse a Column or Identifier in the section below. 5084 if data_type.expressions and index2 - index > 1: 5085 self._retreat(index2) 5086 return self._parse_column_ops(data_type) 5087 5088 self._retreat(index) 5089 5090 if fallback_to_identifier: 5091 return self._parse_id_var() 5092 5093 this = self._parse_column() 5094 return this and self._parse_column_ops(this) 5095 5096 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5097 this = self._parse_type() 5098 if not this: 5099 return None 5100 5101 if isinstance(this, exp.Column) and not this.table: 5102 this = exp.var(this.name.upper()) 5103 5104 return self.expression( 5105 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5106 ) 5107 5108 def _parse_types( 5109 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5110 ) -> t.Optional[exp.Expression]: 5111 index = self._index 5112 5113 this: t.Optional[exp.Expression] = None 5114 prefix = self._match_text_seq("SYSUDTLIB", ".") 5115 5116 if not self._match_set(self.TYPE_TOKENS): 5117 identifier = allow_identifiers and self._parse_id_var( 5118 any_token=False, tokens=(TokenType.VAR,) 5119 ) 5120 if isinstance(identifier, exp.Identifier): 5121 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5122 5123 if len(tokens) != 1: 5124 self.raise_error("Unexpected identifier", self._prev) 5125 5126 if tokens[0].token_type in self.TYPE_TOKENS: 5127 self._prev = tokens[0] 5128 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5129 type_name = identifier.name 5130 5131 while self._match(TokenType.DOT): 5132 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5133 5134 this = exp.DataType.build(type_name, udt=True) 5135 else: 5136 self._retreat(self._index - 1) 5137 return None 5138 else: 5139 return None 5140 5141 type_token = self._prev.token_type 5142 5143 if type_token == TokenType.PSEUDO_TYPE: 5144 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5145 5146 if type_token == TokenType.OBJECT_IDENTIFIER: 5147 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5148 5149 # https://materialize.com/docs/sql/types/map/ 5150 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5151 key_type = self._parse_types( 5152 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5153 ) 5154 if not self._match(TokenType.FARROW): 5155 self._retreat(index) 5156 return None 5157 5158 value_type = self._parse_types( 5159 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5160 ) 5161 if not self._match(TokenType.R_BRACKET): 5162 self._retreat(index) 5163 return None 5164 5165 return exp.DataType( 5166 this=exp.DataType.Type.MAP, 5167 expressions=[key_type, value_type], 5168 nested=True, 5169 prefix=prefix, 5170 ) 5171 5172 nested = type_token in self.NESTED_TYPE_TOKENS 5173 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5174 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5175 expressions = None 5176 maybe_func = False 5177 5178 if self._match(TokenType.L_PAREN): 5179 if is_struct: 5180 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5181 elif nested: 5182 expressions = self._parse_csv( 5183 lambda: self._parse_types( 5184 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5185 ) 5186 ) 5187 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5188 this = expressions[0] 5189 this.set("nullable", True) 5190 self._match_r_paren() 5191 return this 5192 elif type_token in self.ENUM_TYPE_TOKENS: 5193 expressions = self._parse_csv(self._parse_equality) 5194 elif is_aggregate: 5195 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5196 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5197 ) 5198 if not func_or_ident: 5199 return None 5200 expressions = [func_or_ident] 5201 if self._match(TokenType.COMMA): 5202 expressions.extend( 5203 self._parse_csv( 5204 lambda: self._parse_types( 5205 check_func=check_func, 5206 schema=schema, 5207 allow_identifiers=allow_identifiers, 5208 ) 5209 ) 5210 ) 5211 else: 5212 expressions = self._parse_csv(self._parse_type_size) 5213 5214 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5215 if type_token == TokenType.VECTOR and len(expressions) == 2: 5216 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5217 5218 if not expressions or not self._match(TokenType.R_PAREN): 5219 self._retreat(index) 5220 return None 5221 5222 maybe_func = True 5223 5224 values: t.Optional[t.List[exp.Expression]] = None 5225 5226 if nested and self._match(TokenType.LT): 5227 if is_struct: 5228 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5229 else: 5230 expressions = self._parse_csv( 5231 lambda: self._parse_types( 5232 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5233 ) 5234 ) 5235 5236 if not self._match(TokenType.GT): 5237 self.raise_error("Expecting >") 5238 5239 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5240 values = self._parse_csv(self._parse_assignment) 5241 if not values and is_struct: 5242 values = None 5243 self._retreat(self._index - 1) 5244 else: 5245 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5246 5247 if type_token in self.TIMESTAMPS: 5248 if self._match_text_seq("WITH", "TIME", "ZONE"): 5249 maybe_func = False 5250 tz_type = ( 5251 exp.DataType.Type.TIMETZ 5252 if type_token in self.TIMES 5253 else exp.DataType.Type.TIMESTAMPTZ 5254 ) 5255 this = exp.DataType(this=tz_type, expressions=expressions) 5256 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5257 maybe_func = False 5258 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5259 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5260 maybe_func = False 5261 elif type_token == TokenType.INTERVAL: 5262 unit = self._parse_var(upper=True) 5263 if unit: 5264 if self._match_text_seq("TO"): 5265 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5266 5267 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5268 else: 5269 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5270 elif type_token == TokenType.VOID: 5271 this = exp.DataType(this=exp.DataType.Type.NULL) 5272 5273 if maybe_func and check_func: 5274 index2 = self._index 5275 peek = self._parse_string() 5276 5277 if not peek: 5278 self._retreat(index) 5279 return None 5280 5281 self._retreat(index2) 5282 5283 if not this: 5284 if self._match_text_seq("UNSIGNED"): 5285 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5286 if not unsigned_type_token: 5287 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5288 5289 type_token = unsigned_type_token or type_token 5290 5291 this = exp.DataType( 5292 this=exp.DataType.Type[type_token.value], 5293 expressions=expressions, 5294 nested=nested, 5295 prefix=prefix, 5296 ) 5297 5298 # Empty arrays/structs are allowed 5299 if values is not None: 5300 cls = exp.Struct if is_struct else exp.Array 5301 this = exp.cast(cls(expressions=values), this, copy=False) 5302 5303 elif expressions: 5304 this.set("expressions", expressions) 5305 5306 # https://materialize.com/docs/sql/types/list/#type-name 5307 while self._match(TokenType.LIST): 5308 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5309 5310 index = self._index 5311 5312 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5313 matched_array = self._match(TokenType.ARRAY) 5314 5315 while self._curr: 5316 datatype_token = self._prev.token_type 5317 matched_l_bracket = self._match(TokenType.L_BRACKET) 5318 5319 if (not matched_l_bracket and not matched_array) or ( 5320 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5321 ): 5322 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5323 # not to be confused with the fixed size array parsing 5324 break 5325 5326 matched_array = False 5327 values = self._parse_csv(self._parse_assignment) or None 5328 if ( 5329 values 5330 and not schema 5331 and ( 5332 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5333 ) 5334 ): 5335 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5336 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5337 self._retreat(index) 5338 break 5339 5340 this = exp.DataType( 5341 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5342 ) 5343 self._match(TokenType.R_BRACKET) 5344 5345 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5346 converter = self.TYPE_CONVERTERS.get(this.this) 5347 if converter: 5348 this = converter(t.cast(exp.DataType, this)) 5349 5350 return this 5351 5352 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5353 index = self._index 5354 5355 if ( 5356 self._curr 5357 and self._next 5358 and self._curr.token_type in self.TYPE_TOKENS 5359 and self._next.token_type in self.TYPE_TOKENS 5360 ): 5361 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5362 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5363 this = self._parse_id_var() 5364 else: 5365 this = ( 5366 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5367 or self._parse_id_var() 5368 ) 5369 5370 self._match(TokenType.COLON) 5371 5372 if ( 5373 type_required 5374 and not isinstance(this, exp.DataType) 5375 and not self._match_set(self.TYPE_TOKENS, advance=False) 5376 ): 5377 self._retreat(index) 5378 return self._parse_types() 5379 5380 return self._parse_column_def(this) 5381 5382 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5383 if not self._match_text_seq("AT", "TIME", "ZONE"): 5384 return this 5385 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5386 5387 def _parse_column(self) -> t.Optional[exp.Expression]: 5388 this = self._parse_column_reference() 5389 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5390 5391 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5392 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5393 5394 return column 5395 5396 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5397 this = self._parse_field() 5398 if ( 5399 not this 5400 and self._match(TokenType.VALUES, advance=False) 5401 and self.VALUES_FOLLOWED_BY_PAREN 5402 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5403 ): 5404 this = self._parse_id_var() 5405 5406 if isinstance(this, exp.Identifier): 5407 # We bubble up comments from the Identifier to the Column 5408 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5409 5410 return this 5411 5412 def _parse_colon_as_variant_extract( 5413 self, this: t.Optional[exp.Expression] 5414 ) -> t.Optional[exp.Expression]: 5415 casts = [] 5416 json_path = [] 5417 escape = None 5418 5419 while self._match(TokenType.COLON): 5420 start_index = self._index 5421 5422 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5423 path = self._parse_column_ops( 5424 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5425 ) 5426 5427 # The cast :: operator has a lower precedence than the extraction operator :, so 5428 # we rearrange the AST appropriately to avoid casting the JSON path 5429 while isinstance(path, exp.Cast): 5430 casts.append(path.to) 5431 path = path.this 5432 5433 if casts: 5434 dcolon_offset = next( 5435 i 5436 for i, t in enumerate(self._tokens[start_index:]) 5437 if t.token_type == TokenType.DCOLON 5438 ) 5439 end_token = self._tokens[start_index + dcolon_offset - 1] 5440 else: 5441 end_token = self._prev 5442 5443 if path: 5444 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5445 # it'll roundtrip to a string literal in GET_PATH 5446 if isinstance(path, exp.Identifier) and path.quoted: 5447 escape = True 5448 5449 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5450 5451 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5452 # Databricks transforms it back to the colon/dot notation 5453 if json_path: 5454 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5455 5456 if json_path_expr: 5457 json_path_expr.set("escape", escape) 5458 5459 this = self.expression( 5460 exp.JSONExtract, 5461 this=this, 5462 expression=json_path_expr, 5463 variant_extract=True, 5464 ) 5465 5466 while casts: 5467 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5468 5469 return this 5470 5471 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5472 return self._parse_types() 5473 5474 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5475 this = self._parse_bracket(this) 5476 5477 while self._match_set(self.COLUMN_OPERATORS): 5478 op_token = self._prev.token_type 5479 op = self.COLUMN_OPERATORS.get(op_token) 5480 5481 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5482 field = self._parse_dcolon() 5483 if not field: 5484 self.raise_error("Expected type") 5485 elif op and self._curr: 5486 field = self._parse_column_reference() or self._parse_bracket() 5487 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5488 field = self._parse_column_ops(field) 5489 else: 5490 field = self._parse_field(any_token=True, anonymous_func=True) 5491 5492 if isinstance(field, (exp.Func, exp.Window)) and this: 5493 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5494 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5495 this = exp.replace_tree( 5496 this, 5497 lambda n: ( 5498 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5499 if n.table 5500 else n.this 5501 ) 5502 if isinstance(n, exp.Column) 5503 else n, 5504 ) 5505 5506 if op: 5507 this = op(self, this, field) 5508 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5509 this = self.expression( 5510 exp.Column, 5511 comments=this.comments, 5512 this=field, 5513 table=this.this, 5514 db=this.args.get("table"), 5515 catalog=this.args.get("db"), 5516 ) 5517 elif isinstance(field, exp.Window): 5518 # Move the exp.Dot's to the window's function 5519 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5520 field.set("this", window_func) 5521 this = field 5522 else: 5523 this = self.expression(exp.Dot, this=this, expression=field) 5524 5525 if field and field.comments: 5526 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5527 5528 this = self._parse_bracket(this) 5529 5530 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5531 5532 def _parse_primary(self) -> t.Optional[exp.Expression]: 5533 if self._match_set(self.PRIMARY_PARSERS): 5534 token_type = self._prev.token_type 5535 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5536 5537 if token_type == TokenType.STRING: 5538 expressions = [primary] 5539 while self._match(TokenType.STRING): 5540 expressions.append(exp.Literal.string(self._prev.text)) 5541 5542 if len(expressions) > 1: 5543 return self.expression(exp.Concat, expressions=expressions) 5544 5545 return primary 5546 5547 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5548 return exp.Literal.number(f"0.{self._prev.text}") 5549 5550 if self._match(TokenType.L_PAREN): 5551 comments = self._prev_comments 5552 query = self._parse_select() 5553 5554 if query: 5555 expressions = [query] 5556 else: 5557 expressions = self._parse_expressions() 5558 5559 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5560 5561 if not this and self._match(TokenType.R_PAREN, advance=False): 5562 this = self.expression(exp.Tuple) 5563 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5564 this = self._parse_subquery(this=this, parse_alias=False) 5565 elif isinstance(this, exp.Subquery): 5566 this = self._parse_subquery( 5567 this=self._parse_set_operations(this), parse_alias=False 5568 ) 5569 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5570 this = self.expression(exp.Tuple, expressions=expressions) 5571 else: 5572 this = self.expression(exp.Paren, this=this) 5573 5574 if this: 5575 this.add_comments(comments) 5576 5577 self._match_r_paren(expression=this) 5578 return this 5579 5580 return None 5581 5582 def _parse_field( 5583 self, 5584 any_token: bool = False, 5585 tokens: t.Optional[t.Collection[TokenType]] = None, 5586 anonymous_func: bool = False, 5587 ) -> t.Optional[exp.Expression]: 5588 if anonymous_func: 5589 field = ( 5590 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5591 or self._parse_primary() 5592 ) 5593 else: 5594 field = self._parse_primary() or self._parse_function( 5595 anonymous=anonymous_func, any_token=any_token 5596 ) 5597 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5598 5599 def _parse_function( 5600 self, 5601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5602 anonymous: bool = False, 5603 optional_parens: bool = True, 5604 any_token: bool = False, 5605 ) -> t.Optional[exp.Expression]: 5606 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5607 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5608 fn_syntax = False 5609 if ( 5610 self._match(TokenType.L_BRACE, advance=False) 5611 and self._next 5612 and self._next.text.upper() == "FN" 5613 ): 5614 self._advance(2) 5615 fn_syntax = True 5616 5617 func = self._parse_function_call( 5618 functions=functions, 5619 anonymous=anonymous, 5620 optional_parens=optional_parens, 5621 any_token=any_token, 5622 ) 5623 5624 if fn_syntax: 5625 self._match(TokenType.R_BRACE) 5626 5627 return func 5628 5629 def _parse_function_call( 5630 self, 5631 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5632 anonymous: bool = False, 5633 optional_parens: bool = True, 5634 any_token: bool = False, 5635 ) -> t.Optional[exp.Expression]: 5636 if not self._curr: 5637 return None 5638 5639 comments = self._curr.comments 5640 token = self._curr 5641 token_type = self._curr.token_type 5642 this = self._curr.text 5643 upper = this.upper() 5644 5645 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5646 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5647 self._advance() 5648 return self._parse_window(parser(self)) 5649 5650 if not self._next or self._next.token_type != TokenType.L_PAREN: 5651 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5652 self._advance() 5653 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5654 5655 return None 5656 5657 if any_token: 5658 if token_type in self.RESERVED_TOKENS: 5659 return None 5660 elif token_type not in self.FUNC_TOKENS: 5661 return None 5662 5663 self._advance(2) 5664 5665 parser = self.FUNCTION_PARSERS.get(upper) 5666 if parser and not anonymous: 5667 this = parser(self) 5668 else: 5669 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5670 5671 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5672 this = self.expression( 5673 subquery_predicate, comments=comments, this=self._parse_select() 5674 ) 5675 self._match_r_paren() 5676 return this 5677 5678 if functions is None: 5679 functions = self.FUNCTIONS 5680 5681 function = functions.get(upper) 5682 known_function = function and not anonymous 5683 5684 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5685 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5686 5687 post_func_comments = self._curr and self._curr.comments 5688 if known_function and post_func_comments: 5689 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5690 # call we'll construct it as exp.Anonymous, even if it's "known" 5691 if any( 5692 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5693 for comment in post_func_comments 5694 ): 5695 known_function = False 5696 5697 if alias and known_function: 5698 args = self._kv_to_prop_eq(args) 5699 5700 if known_function: 5701 func_builder = t.cast(t.Callable, function) 5702 5703 if "dialect" in func_builder.__code__.co_varnames: 5704 func = func_builder(args, dialect=self.dialect) 5705 else: 5706 func = func_builder(args) 5707 5708 func = self.validate_expression(func, args) 5709 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5710 func.meta["name"] = this 5711 5712 this = func 5713 else: 5714 if token_type == TokenType.IDENTIFIER: 5715 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5716 5717 this = self.expression(exp.Anonymous, this=this, expressions=args) 5718 this = this.update_positions(token) 5719 5720 if isinstance(this, exp.Expression): 5721 this.add_comments(comments) 5722 5723 self._match_r_paren(this) 5724 return self._parse_window(this) 5725 5726 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5727 return expression 5728 5729 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5730 transformed = [] 5731 5732 for index, e in enumerate(expressions): 5733 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5734 if isinstance(e, exp.Alias): 5735 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5736 5737 if not isinstance(e, exp.PropertyEQ): 5738 e = self.expression( 5739 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5740 ) 5741 5742 if isinstance(e.this, exp.Column): 5743 e.this.replace(e.this.this) 5744 else: 5745 e = self._to_prop_eq(e, index) 5746 5747 transformed.append(e) 5748 5749 return transformed 5750 5751 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5752 return self._parse_statement() 5753 5754 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5755 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5756 5757 def _parse_user_defined_function( 5758 self, kind: t.Optional[TokenType] = None 5759 ) -> t.Optional[exp.Expression]: 5760 this = self._parse_table_parts(schema=True) 5761 5762 if not self._match(TokenType.L_PAREN): 5763 return this 5764 5765 expressions = self._parse_csv(self._parse_function_parameter) 5766 self._match_r_paren() 5767 return self.expression( 5768 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5769 ) 5770 5771 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5772 literal = self._parse_primary() 5773 if literal: 5774 return self.expression(exp.Introducer, this=token.text, expression=literal) 5775 5776 return self._identifier_expression(token) 5777 5778 def _parse_session_parameter(self) -> exp.SessionParameter: 5779 kind = None 5780 this = self._parse_id_var() or self._parse_primary() 5781 5782 if this and self._match(TokenType.DOT): 5783 kind = this.name 5784 this = self._parse_var() or self._parse_primary() 5785 5786 return self.expression(exp.SessionParameter, this=this, kind=kind) 5787 5788 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5789 return self._parse_id_var() 5790 5791 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5792 index = self._index 5793 5794 if self._match(TokenType.L_PAREN): 5795 expressions = t.cast( 5796 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5797 ) 5798 5799 if not self._match(TokenType.R_PAREN): 5800 self._retreat(index) 5801 else: 5802 expressions = [self._parse_lambda_arg()] 5803 5804 if self._match_set(self.LAMBDAS): 5805 return self.LAMBDAS[self._prev.token_type](self, expressions) 5806 5807 self._retreat(index) 5808 5809 this: t.Optional[exp.Expression] 5810 5811 if self._match(TokenType.DISTINCT): 5812 this = self.expression( 5813 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5814 ) 5815 else: 5816 this = self._parse_select_or_expression(alias=alias) 5817 5818 return self._parse_limit( 5819 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5820 ) 5821 5822 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5823 index = self._index 5824 if not self._match(TokenType.L_PAREN): 5825 return this 5826 5827 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5828 # expr can be of both types 5829 if self._match_set(self.SELECT_START_TOKENS): 5830 self._retreat(index) 5831 return this 5832 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5833 self._match_r_paren() 5834 return self.expression(exp.Schema, this=this, expressions=args) 5835 5836 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5837 return self._parse_column_def(self._parse_field(any_token=True)) 5838 5839 def _parse_column_def( 5840 self, this: t.Optional[exp.Expression], computed_column: bool = True 5841 ) -> t.Optional[exp.Expression]: 5842 # column defs are not really columns, they're identifiers 5843 if isinstance(this, exp.Column): 5844 this = this.this 5845 5846 if not computed_column: 5847 self._match(TokenType.ALIAS) 5848 5849 kind = self._parse_types(schema=True) 5850 5851 if self._match_text_seq("FOR", "ORDINALITY"): 5852 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5853 5854 constraints: t.List[exp.Expression] = [] 5855 5856 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5857 ("ALIAS", "MATERIALIZED") 5858 ): 5859 persisted = self._prev.text.upper() == "MATERIALIZED" 5860 constraint_kind = exp.ComputedColumnConstraint( 5861 this=self._parse_assignment(), 5862 persisted=persisted or self._match_text_seq("PERSISTED"), 5863 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5864 ) 5865 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5866 elif ( 5867 kind 5868 and self._match(TokenType.ALIAS, advance=False) 5869 and ( 5870 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5871 or (self._next and self._next.token_type == TokenType.L_PAREN) 5872 ) 5873 ): 5874 self._advance() 5875 constraints.append( 5876 self.expression( 5877 exp.ColumnConstraint, 5878 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5879 ) 5880 ) 5881 5882 while True: 5883 constraint = self._parse_column_constraint() 5884 if not constraint: 5885 break 5886 constraints.append(constraint) 5887 5888 if not kind and not constraints: 5889 return this 5890 5891 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5892 5893 def _parse_auto_increment( 5894 self, 5895 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5896 start = None 5897 increment = None 5898 5899 if self._match(TokenType.L_PAREN, advance=False): 5900 args = self._parse_wrapped_csv(self._parse_bitwise) 5901 start = seq_get(args, 0) 5902 increment = seq_get(args, 1) 5903 elif self._match_text_seq("START"): 5904 start = self._parse_bitwise() 5905 self._match_text_seq("INCREMENT") 5906 increment = self._parse_bitwise() 5907 5908 if start and increment: 5909 return exp.GeneratedAsIdentityColumnConstraint( 5910 start=start, increment=increment, this=False 5911 ) 5912 5913 return exp.AutoIncrementColumnConstraint() 5914 5915 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5916 if not self._match_text_seq("REFRESH"): 5917 self._retreat(self._index - 1) 5918 return None 5919 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5920 5921 def _parse_compress(self) -> exp.CompressColumnConstraint: 5922 if self._match(TokenType.L_PAREN, advance=False): 5923 return self.expression( 5924 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5925 ) 5926 5927 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5928 5929 def _parse_generated_as_identity( 5930 self, 5931 ) -> ( 5932 exp.GeneratedAsIdentityColumnConstraint 5933 | exp.ComputedColumnConstraint 5934 | exp.GeneratedAsRowColumnConstraint 5935 ): 5936 if self._match_text_seq("BY", "DEFAULT"): 5937 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5938 this = self.expression( 5939 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5940 ) 5941 else: 5942 self._match_text_seq("ALWAYS") 5943 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5944 5945 self._match(TokenType.ALIAS) 5946 5947 if self._match_text_seq("ROW"): 5948 start = self._match_text_seq("START") 5949 if not start: 5950 self._match(TokenType.END) 5951 hidden = self._match_text_seq("HIDDEN") 5952 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5953 5954 identity = self._match_text_seq("IDENTITY") 5955 5956 if self._match(TokenType.L_PAREN): 5957 if self._match(TokenType.START_WITH): 5958 this.set("start", self._parse_bitwise()) 5959 if self._match_text_seq("INCREMENT", "BY"): 5960 this.set("increment", self._parse_bitwise()) 5961 if self._match_text_seq("MINVALUE"): 5962 this.set("minvalue", self._parse_bitwise()) 5963 if self._match_text_seq("MAXVALUE"): 5964 this.set("maxvalue", self._parse_bitwise()) 5965 5966 if self._match_text_seq("CYCLE"): 5967 this.set("cycle", True) 5968 elif self._match_text_seq("NO", "CYCLE"): 5969 this.set("cycle", False) 5970 5971 if not identity: 5972 this.set("expression", self._parse_range()) 5973 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5974 args = self._parse_csv(self._parse_bitwise) 5975 this.set("start", seq_get(args, 0)) 5976 this.set("increment", seq_get(args, 1)) 5977 5978 self._match_r_paren() 5979 5980 return this 5981 5982 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5983 self._match_text_seq("LENGTH") 5984 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5985 5986 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5987 if self._match_text_seq("NULL"): 5988 return self.expression(exp.NotNullColumnConstraint) 5989 if self._match_text_seq("CASESPECIFIC"): 5990 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5991 if self._match_text_seq("FOR", "REPLICATION"): 5992 return self.expression(exp.NotForReplicationColumnConstraint) 5993 5994 # Unconsume the `NOT` token 5995 self._retreat(self._index - 1) 5996 return None 5997 5998 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5999 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6000 6001 procedure_option_follows = ( 6002 self._match(TokenType.WITH, advance=False) 6003 and self._next 6004 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6005 ) 6006 6007 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6008 return self.expression( 6009 exp.ColumnConstraint, 6010 this=this, 6011 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6012 ) 6013 6014 return this 6015 6016 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6017 if not self._match(TokenType.CONSTRAINT): 6018 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6019 6020 return self.expression( 6021 exp.Constraint, 6022 this=self._parse_id_var(), 6023 expressions=self._parse_unnamed_constraints(), 6024 ) 6025 6026 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6027 constraints = [] 6028 while True: 6029 constraint = self._parse_unnamed_constraint() or self._parse_function() 6030 if not constraint: 6031 break 6032 constraints.append(constraint) 6033 6034 return constraints 6035 6036 def _parse_unnamed_constraint( 6037 self, constraints: t.Optional[t.Collection[str]] = None 6038 ) -> t.Optional[exp.Expression]: 6039 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6040 constraints or self.CONSTRAINT_PARSERS 6041 ): 6042 return None 6043 6044 constraint = self._prev.text.upper() 6045 if constraint not in self.CONSTRAINT_PARSERS: 6046 self.raise_error(f"No parser found for schema constraint {constraint}.") 6047 6048 return self.CONSTRAINT_PARSERS[constraint](self) 6049 6050 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6051 return self._parse_id_var(any_token=False) 6052 6053 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6054 self._match_text_seq("KEY") 6055 return self.expression( 6056 exp.UniqueColumnConstraint, 6057 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6058 this=self._parse_schema(self._parse_unique_key()), 6059 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6060 on_conflict=self._parse_on_conflict(), 6061 options=self._parse_key_constraint_options(), 6062 ) 6063 6064 def _parse_key_constraint_options(self) -> t.List[str]: 6065 options = [] 6066 while True: 6067 if not self._curr: 6068 break 6069 6070 if self._match(TokenType.ON): 6071 action = None 6072 on = self._advance_any() and self._prev.text 6073 6074 if self._match_text_seq("NO", "ACTION"): 6075 action = "NO ACTION" 6076 elif self._match_text_seq("CASCADE"): 6077 action = "CASCADE" 6078 elif self._match_text_seq("RESTRICT"): 6079 action = "RESTRICT" 6080 elif self._match_pair(TokenType.SET, TokenType.NULL): 6081 action = "SET NULL" 6082 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6083 action = "SET DEFAULT" 6084 else: 6085 self.raise_error("Invalid key constraint") 6086 6087 options.append(f"ON {on} {action}") 6088 else: 6089 var = self._parse_var_from_options( 6090 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6091 ) 6092 if not var: 6093 break 6094 options.append(var.name) 6095 6096 return options 6097 6098 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6099 if match and not self._match(TokenType.REFERENCES): 6100 return None 6101 6102 expressions = None 6103 this = self._parse_table(schema=True) 6104 options = self._parse_key_constraint_options() 6105 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6106 6107 def _parse_foreign_key(self) -> exp.ForeignKey: 6108 expressions = ( 6109 self._parse_wrapped_id_vars() 6110 if not self._match(TokenType.REFERENCES, advance=False) 6111 else None 6112 ) 6113 reference = self._parse_references() 6114 on_options = {} 6115 6116 while self._match(TokenType.ON): 6117 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6118 self.raise_error("Expected DELETE or UPDATE") 6119 6120 kind = self._prev.text.lower() 6121 6122 if self._match_text_seq("NO", "ACTION"): 6123 action = "NO ACTION" 6124 elif self._match(TokenType.SET): 6125 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6126 action = "SET " + self._prev.text.upper() 6127 else: 6128 self._advance() 6129 action = self._prev.text.upper() 6130 6131 on_options[kind] = action 6132 6133 return self.expression( 6134 exp.ForeignKey, 6135 expressions=expressions, 6136 reference=reference, 6137 options=self._parse_key_constraint_options(), 6138 **on_options, # type: ignore 6139 ) 6140 6141 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6142 return self._parse_ordered() or self._parse_field() 6143 6144 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6145 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6146 self._retreat(self._index - 1) 6147 return None 6148 6149 id_vars = self._parse_wrapped_id_vars() 6150 return self.expression( 6151 exp.PeriodForSystemTimeConstraint, 6152 this=seq_get(id_vars, 0), 6153 expression=seq_get(id_vars, 1), 6154 ) 6155 6156 def _parse_primary_key( 6157 self, wrapped_optional: bool = False, in_props: bool = False 6158 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6159 desc = ( 6160 self._match_set((TokenType.ASC, TokenType.DESC)) 6161 and self._prev.token_type == TokenType.DESC 6162 ) 6163 6164 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6165 return self.expression( 6166 exp.PrimaryKeyColumnConstraint, 6167 desc=desc, 6168 options=self._parse_key_constraint_options(), 6169 ) 6170 6171 expressions = self._parse_wrapped_csv( 6172 self._parse_primary_key_part, optional=wrapped_optional 6173 ) 6174 options = self._parse_key_constraint_options() 6175 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6176 6177 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6178 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6179 6180 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6181 """ 6182 Parses a datetime column in ODBC format. We parse the column into the corresponding 6183 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6184 same as we did for `DATE('yyyy-mm-dd')`. 6185 6186 Reference: 6187 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6188 """ 6189 self._match(TokenType.VAR) 6190 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6191 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6192 if not self._match(TokenType.R_BRACE): 6193 self.raise_error("Expected }") 6194 return expression 6195 6196 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6197 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6198 return this 6199 6200 bracket_kind = self._prev.token_type 6201 if ( 6202 bracket_kind == TokenType.L_BRACE 6203 and self._curr 6204 and self._curr.token_type == TokenType.VAR 6205 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6206 ): 6207 return self._parse_odbc_datetime_literal() 6208 6209 expressions = self._parse_csv( 6210 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6211 ) 6212 6213 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6214 self.raise_error("Expected ]") 6215 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6216 self.raise_error("Expected }") 6217 6218 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6219 if bracket_kind == TokenType.L_BRACE: 6220 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6221 elif not this: 6222 this = build_array_constructor( 6223 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6224 ) 6225 else: 6226 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6227 if constructor_type: 6228 return build_array_constructor( 6229 constructor_type, 6230 args=expressions, 6231 bracket_kind=bracket_kind, 6232 dialect=self.dialect, 6233 ) 6234 6235 expressions = apply_index_offset( 6236 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6237 ) 6238 this = self.expression( 6239 exp.Bracket, 6240 this=this, 6241 expressions=expressions, 6242 comments=this.pop_comments(), 6243 ) 6244 6245 self._add_comments(this) 6246 return self._parse_bracket(this) 6247 6248 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6249 if self._match(TokenType.COLON): 6250 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6251 return this 6252 6253 def _parse_case(self) -> t.Optional[exp.Expression]: 6254 ifs = [] 6255 default = None 6256 6257 comments = self._prev_comments 6258 expression = self._parse_assignment() 6259 6260 while self._match(TokenType.WHEN): 6261 this = self._parse_assignment() 6262 self._match(TokenType.THEN) 6263 then = self._parse_assignment() 6264 ifs.append(self.expression(exp.If, this=this, true=then)) 6265 6266 if self._match(TokenType.ELSE): 6267 default = self._parse_assignment() 6268 6269 if not self._match(TokenType.END): 6270 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6271 default = exp.column("interval") 6272 else: 6273 self.raise_error("Expected END after CASE", self._prev) 6274 6275 return self.expression( 6276 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6277 ) 6278 6279 def _parse_if(self) -> t.Optional[exp.Expression]: 6280 if self._match(TokenType.L_PAREN): 6281 args = self._parse_csv( 6282 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6283 ) 6284 this = self.validate_expression(exp.If.from_arg_list(args), args) 6285 self._match_r_paren() 6286 else: 6287 index = self._index - 1 6288 6289 if self.NO_PAREN_IF_COMMANDS and index == 0: 6290 return self._parse_as_command(self._prev) 6291 6292 condition = self._parse_assignment() 6293 6294 if not condition: 6295 self._retreat(index) 6296 return None 6297 6298 self._match(TokenType.THEN) 6299 true = self._parse_assignment() 6300 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6301 self._match(TokenType.END) 6302 this = self.expression(exp.If, this=condition, true=true, false=false) 6303 6304 return this 6305 6306 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6307 if not self._match_text_seq("VALUE", "FOR"): 6308 self._retreat(self._index - 1) 6309 return None 6310 6311 return self.expression( 6312 exp.NextValueFor, 6313 this=self._parse_column(), 6314 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6315 ) 6316 6317 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6318 this = self._parse_function() or self._parse_var_or_string(upper=True) 6319 6320 if self._match(TokenType.FROM): 6321 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6322 6323 if not self._match(TokenType.COMMA): 6324 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6325 6326 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6327 6328 def _parse_gap_fill(self) -> exp.GapFill: 6329 self._match(TokenType.TABLE) 6330 this = self._parse_table() 6331 6332 self._match(TokenType.COMMA) 6333 args = [this, *self._parse_csv(self._parse_lambda)] 6334 6335 gap_fill = exp.GapFill.from_arg_list(args) 6336 return self.validate_expression(gap_fill, args) 6337 6338 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6339 this = self._parse_assignment() 6340 6341 if not self._match(TokenType.ALIAS): 6342 if self._match(TokenType.COMMA): 6343 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6344 6345 self.raise_error("Expected AS after CAST") 6346 6347 fmt = None 6348 to = self._parse_types() 6349 6350 default = self._match(TokenType.DEFAULT) 6351 if default: 6352 default = self._parse_bitwise() 6353 self._match_text_seq("ON", "CONVERSION", "ERROR") 6354 6355 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6356 fmt_string = self._parse_string() 6357 fmt = self._parse_at_time_zone(fmt_string) 6358 6359 if not to: 6360 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6361 if to.this in exp.DataType.TEMPORAL_TYPES: 6362 this = self.expression( 6363 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6364 this=this, 6365 format=exp.Literal.string( 6366 format_time( 6367 fmt_string.this if fmt_string else "", 6368 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6369 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6370 ) 6371 ), 6372 safe=safe, 6373 ) 6374 6375 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6376 this.set("zone", fmt.args["zone"]) 6377 return this 6378 elif not to: 6379 self.raise_error("Expected TYPE after CAST") 6380 elif isinstance(to, exp.Identifier): 6381 to = exp.DataType.build(to.name, udt=True) 6382 elif to.this == exp.DataType.Type.CHAR: 6383 if self._match(TokenType.CHARACTER_SET): 6384 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6385 6386 return self.expression( 6387 exp.Cast if strict else exp.TryCast, 6388 this=this, 6389 to=to, 6390 format=fmt, 6391 safe=safe, 6392 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6393 default=default, 6394 ) 6395 6396 def _parse_string_agg(self) -> exp.GroupConcat: 6397 if self._match(TokenType.DISTINCT): 6398 args: t.List[t.Optional[exp.Expression]] = [ 6399 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6400 ] 6401 if self._match(TokenType.COMMA): 6402 args.extend(self._parse_csv(self._parse_assignment)) 6403 else: 6404 args = self._parse_csv(self._parse_assignment) # type: ignore 6405 6406 if self._match_text_seq("ON", "OVERFLOW"): 6407 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6408 if self._match_text_seq("ERROR"): 6409 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6410 else: 6411 self._match_text_seq("TRUNCATE") 6412 on_overflow = self.expression( 6413 exp.OverflowTruncateBehavior, 6414 this=self._parse_string(), 6415 with_count=( 6416 self._match_text_seq("WITH", "COUNT") 6417 or not self._match_text_seq("WITHOUT", "COUNT") 6418 ), 6419 ) 6420 else: 6421 on_overflow = None 6422 6423 index = self._index 6424 if not self._match(TokenType.R_PAREN) and args: 6425 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6426 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6427 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6428 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6429 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6430 6431 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6432 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6433 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6434 if not self._match_text_seq("WITHIN", "GROUP"): 6435 self._retreat(index) 6436 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6437 6438 # The corresponding match_r_paren will be called in parse_function (caller) 6439 self._match_l_paren() 6440 6441 return self.expression( 6442 exp.GroupConcat, 6443 this=self._parse_order(this=seq_get(args, 0)), 6444 separator=seq_get(args, 1), 6445 on_overflow=on_overflow, 6446 ) 6447 6448 def _parse_convert( 6449 self, strict: bool, safe: t.Optional[bool] = None 6450 ) -> t.Optional[exp.Expression]: 6451 this = self._parse_bitwise() 6452 6453 if self._match(TokenType.USING): 6454 to: t.Optional[exp.Expression] = self.expression( 6455 exp.CharacterSet, this=self._parse_var() 6456 ) 6457 elif self._match(TokenType.COMMA): 6458 to = self._parse_types() 6459 else: 6460 to = None 6461 6462 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6463 6464 def _parse_xml_table(self) -> exp.XMLTable: 6465 namespaces = None 6466 passing = None 6467 columns = None 6468 6469 if self._match_text_seq("XMLNAMESPACES", "("): 6470 namespaces = self._parse_xml_namespace() 6471 self._match_text_seq(")", ",") 6472 6473 this = self._parse_string() 6474 6475 if self._match_text_seq("PASSING"): 6476 # The BY VALUE keywords are optional and are provided for semantic clarity 6477 self._match_text_seq("BY", "VALUE") 6478 passing = self._parse_csv(self._parse_column) 6479 6480 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6481 6482 if self._match_text_seq("COLUMNS"): 6483 columns = self._parse_csv(self._parse_field_def) 6484 6485 return self.expression( 6486 exp.XMLTable, 6487 this=this, 6488 namespaces=namespaces, 6489 passing=passing, 6490 columns=columns, 6491 by_ref=by_ref, 6492 ) 6493 6494 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6495 namespaces = [] 6496 6497 while True: 6498 if self._match(TokenType.DEFAULT): 6499 uri = self._parse_string() 6500 else: 6501 uri = self._parse_alias(self._parse_string()) 6502 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6503 if not self._match(TokenType.COMMA): 6504 break 6505 6506 return namespaces 6507 6508 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6509 """ 6510 There are generally two variants of the DECODE function: 6511 6512 - DECODE(bin, charset) 6513 - DECODE(expression, search, result [, search, result] ... [, default]) 6514 6515 The second variant will always be parsed into a CASE expression. Note that NULL 6516 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6517 instead of relying on pattern matching. 6518 """ 6519 args = self._parse_csv(self._parse_assignment) 6520 6521 if len(args) < 3: 6522 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6523 6524 expression, *expressions = args 6525 if not expression: 6526 return None 6527 6528 ifs = [] 6529 for search, result in zip(expressions[::2], expressions[1::2]): 6530 if not search or not result: 6531 return None 6532 6533 if isinstance(search, exp.Literal): 6534 ifs.append( 6535 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6536 ) 6537 elif isinstance(search, exp.Null): 6538 ifs.append( 6539 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6540 ) 6541 else: 6542 cond = exp.or_( 6543 exp.EQ(this=expression.copy(), expression=search), 6544 exp.and_( 6545 exp.Is(this=expression.copy(), expression=exp.Null()), 6546 exp.Is(this=search.copy(), expression=exp.Null()), 6547 copy=False, 6548 ), 6549 copy=False, 6550 ) 6551 ifs.append(exp.If(this=cond, true=result)) 6552 6553 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6554 6555 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6556 self._match_text_seq("KEY") 6557 key = self._parse_column() 6558 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6559 self._match_text_seq("VALUE") 6560 value = self._parse_bitwise() 6561 6562 if not key and not value: 6563 return None 6564 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6565 6566 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6567 if not this or not self._match_text_seq("FORMAT", "JSON"): 6568 return this 6569 6570 return self.expression(exp.FormatJson, this=this) 6571 6572 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6573 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6574 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6575 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6576 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6577 else: 6578 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6579 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6580 6581 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6582 6583 if not empty and not error and not null: 6584 return None 6585 6586 return self.expression( 6587 exp.OnCondition, 6588 empty=empty, 6589 error=error, 6590 null=null, 6591 ) 6592 6593 def _parse_on_handling( 6594 self, on: str, *values: str 6595 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6596 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6597 for value in values: 6598 if self._match_text_seq(value, "ON", on): 6599 return f"{value} ON {on}" 6600 6601 index = self._index 6602 if self._match(TokenType.DEFAULT): 6603 default_value = self._parse_bitwise() 6604 if self._match_text_seq("ON", on): 6605 return default_value 6606 6607 self._retreat(index) 6608 6609 return None 6610 6611 @t.overload 6612 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6613 6614 @t.overload 6615 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6616 6617 def _parse_json_object(self, agg=False): 6618 star = self._parse_star() 6619 expressions = ( 6620 [star] 6621 if star 6622 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6623 ) 6624 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6625 6626 unique_keys = None 6627 if self._match_text_seq("WITH", "UNIQUE"): 6628 unique_keys = True 6629 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6630 unique_keys = False 6631 6632 self._match_text_seq("KEYS") 6633 6634 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6635 self._parse_type() 6636 ) 6637 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6638 6639 return self.expression( 6640 exp.JSONObjectAgg if agg else exp.JSONObject, 6641 expressions=expressions, 6642 null_handling=null_handling, 6643 unique_keys=unique_keys, 6644 return_type=return_type, 6645 encoding=encoding, 6646 ) 6647 6648 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6649 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6650 if not self._match_text_seq("NESTED"): 6651 this = self._parse_id_var() 6652 kind = self._parse_types(allow_identifiers=False) 6653 nested = None 6654 else: 6655 this = None 6656 kind = None 6657 nested = True 6658 6659 path = self._match_text_seq("PATH") and self._parse_string() 6660 nested_schema = nested and self._parse_json_schema() 6661 6662 return self.expression( 6663 exp.JSONColumnDef, 6664 this=this, 6665 kind=kind, 6666 path=path, 6667 nested_schema=nested_schema, 6668 ) 6669 6670 def _parse_json_schema(self) -> exp.JSONSchema: 6671 self._match_text_seq("COLUMNS") 6672 return self.expression( 6673 exp.JSONSchema, 6674 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6675 ) 6676 6677 def _parse_json_table(self) -> exp.JSONTable: 6678 this = self._parse_format_json(self._parse_bitwise()) 6679 path = self._match(TokenType.COMMA) and self._parse_string() 6680 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6681 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6682 schema = self._parse_json_schema() 6683 6684 return exp.JSONTable( 6685 this=this, 6686 schema=schema, 6687 path=path, 6688 error_handling=error_handling, 6689 empty_handling=empty_handling, 6690 ) 6691 6692 def _parse_match_against(self) -> exp.MatchAgainst: 6693 expressions = self._parse_csv(self._parse_column) 6694 6695 self._match_text_seq(")", "AGAINST", "(") 6696 6697 this = self._parse_string() 6698 6699 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6700 modifier = "IN NATURAL LANGUAGE MODE" 6701 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6702 modifier = f"{modifier} WITH QUERY EXPANSION" 6703 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6704 modifier = "IN BOOLEAN MODE" 6705 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6706 modifier = "WITH QUERY EXPANSION" 6707 else: 6708 modifier = None 6709 6710 return self.expression( 6711 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6712 ) 6713 6714 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6715 def _parse_open_json(self) -> exp.OpenJSON: 6716 this = self._parse_bitwise() 6717 path = self._match(TokenType.COMMA) and self._parse_string() 6718 6719 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6720 this = self._parse_field(any_token=True) 6721 kind = self._parse_types() 6722 path = self._parse_string() 6723 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6724 6725 return self.expression( 6726 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6727 ) 6728 6729 expressions = None 6730 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6731 self._match_l_paren() 6732 expressions = self._parse_csv(_parse_open_json_column_def) 6733 6734 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6735 6736 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6737 args = self._parse_csv(self._parse_bitwise) 6738 6739 if self._match(TokenType.IN): 6740 return self.expression( 6741 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6742 ) 6743 6744 if haystack_first: 6745 haystack = seq_get(args, 0) 6746 needle = seq_get(args, 1) 6747 else: 6748 haystack = seq_get(args, 1) 6749 needle = seq_get(args, 0) 6750 6751 return self.expression( 6752 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6753 ) 6754 6755 def _parse_predict(self) -> exp.Predict: 6756 self._match_text_seq("MODEL") 6757 this = self._parse_table() 6758 6759 self._match(TokenType.COMMA) 6760 self._match_text_seq("TABLE") 6761 6762 return self.expression( 6763 exp.Predict, 6764 this=this, 6765 expression=self._parse_table(), 6766 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6767 ) 6768 6769 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6770 args = self._parse_csv(self._parse_table) 6771 return exp.JoinHint(this=func_name.upper(), expressions=args) 6772 6773 def _parse_substring(self) -> exp.Substring: 6774 # Postgres supports the form: substring(string [from int] [for int]) 6775 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6776 6777 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6778 6779 if self._match(TokenType.FROM): 6780 args.append(self._parse_bitwise()) 6781 if self._match(TokenType.FOR): 6782 if len(args) == 1: 6783 args.append(exp.Literal.number(1)) 6784 args.append(self._parse_bitwise()) 6785 6786 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6787 6788 def _parse_trim(self) -> exp.Trim: 6789 # https://www.w3resource.com/sql/character-functions/trim.php 6790 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6791 6792 position = None 6793 collation = None 6794 expression = None 6795 6796 if self._match_texts(self.TRIM_TYPES): 6797 position = self._prev.text.upper() 6798 6799 this = self._parse_bitwise() 6800 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6801 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6802 expression = self._parse_bitwise() 6803 6804 if invert_order: 6805 this, expression = expression, this 6806 6807 if self._match(TokenType.COLLATE): 6808 collation = self._parse_bitwise() 6809 6810 return self.expression( 6811 exp.Trim, this=this, position=position, expression=expression, collation=collation 6812 ) 6813 6814 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6815 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6816 6817 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6818 return self._parse_window(self._parse_id_var(), alias=True) 6819 6820 def _parse_respect_or_ignore_nulls( 6821 self, this: t.Optional[exp.Expression] 6822 ) -> t.Optional[exp.Expression]: 6823 if self._match_text_seq("IGNORE", "NULLS"): 6824 return self.expression(exp.IgnoreNulls, this=this) 6825 if self._match_text_seq("RESPECT", "NULLS"): 6826 return self.expression(exp.RespectNulls, this=this) 6827 return this 6828 6829 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6830 if self._match(TokenType.HAVING): 6831 self._match_texts(("MAX", "MIN")) 6832 max = self._prev.text.upper() != "MIN" 6833 return self.expression( 6834 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6835 ) 6836 6837 return this 6838 6839 def _parse_window( 6840 self, this: t.Optional[exp.Expression], alias: bool = False 6841 ) -> t.Optional[exp.Expression]: 6842 func = this 6843 comments = func.comments if isinstance(func, exp.Expression) else None 6844 6845 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6846 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6847 if self._match_text_seq("WITHIN", "GROUP"): 6848 order = self._parse_wrapped(self._parse_order) 6849 this = self.expression(exp.WithinGroup, this=this, expression=order) 6850 6851 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6852 self._match(TokenType.WHERE) 6853 this = self.expression( 6854 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6855 ) 6856 self._match_r_paren() 6857 6858 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6859 # Some dialects choose to implement and some do not. 6860 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6861 6862 # There is some code above in _parse_lambda that handles 6863 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6864 6865 # The below changes handle 6866 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6867 6868 # Oracle allows both formats 6869 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6870 # and Snowflake chose to do the same for familiarity 6871 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6872 if isinstance(this, exp.AggFunc): 6873 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6874 6875 if ignore_respect and ignore_respect is not this: 6876 ignore_respect.replace(ignore_respect.this) 6877 this = self.expression(ignore_respect.__class__, this=this) 6878 6879 this = self._parse_respect_or_ignore_nulls(this) 6880 6881 # bigquery select from window x AS (partition by ...) 6882 if alias: 6883 over = None 6884 self._match(TokenType.ALIAS) 6885 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6886 return this 6887 else: 6888 over = self._prev.text.upper() 6889 6890 if comments and isinstance(func, exp.Expression): 6891 func.pop_comments() 6892 6893 if not self._match(TokenType.L_PAREN): 6894 return self.expression( 6895 exp.Window, 6896 comments=comments, 6897 this=this, 6898 alias=self._parse_id_var(False), 6899 over=over, 6900 ) 6901 6902 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6903 6904 first = self._match(TokenType.FIRST) 6905 if self._match_text_seq("LAST"): 6906 first = False 6907 6908 partition, order = self._parse_partition_and_order() 6909 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6910 6911 if kind: 6912 self._match(TokenType.BETWEEN) 6913 start = self._parse_window_spec() 6914 self._match(TokenType.AND) 6915 end = self._parse_window_spec() 6916 exclude = ( 6917 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6918 if self._match_text_seq("EXCLUDE") 6919 else None 6920 ) 6921 6922 spec = self.expression( 6923 exp.WindowSpec, 6924 kind=kind, 6925 start=start["value"], 6926 start_side=start["side"], 6927 end=end["value"], 6928 end_side=end["side"], 6929 exclude=exclude, 6930 ) 6931 else: 6932 spec = None 6933 6934 self._match_r_paren() 6935 6936 window = self.expression( 6937 exp.Window, 6938 comments=comments, 6939 this=this, 6940 partition_by=partition, 6941 order=order, 6942 spec=spec, 6943 alias=window_alias, 6944 over=over, 6945 first=first, 6946 ) 6947 6948 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6949 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6950 return self._parse_window(window, alias=alias) 6951 6952 return window 6953 6954 def _parse_partition_and_order( 6955 self, 6956 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6957 return self._parse_partition_by(), self._parse_order() 6958 6959 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6960 self._match(TokenType.BETWEEN) 6961 6962 return { 6963 "value": ( 6964 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6965 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6966 or self._parse_bitwise() 6967 ), 6968 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6969 } 6970 6971 def _parse_alias( 6972 self, this: t.Optional[exp.Expression], explicit: bool = False 6973 ) -> t.Optional[exp.Expression]: 6974 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6975 # so this section tries to parse the clause version and if it fails, it treats the token 6976 # as an identifier (alias) 6977 if self._can_parse_limit_or_offset(): 6978 return this 6979 6980 any_token = self._match(TokenType.ALIAS) 6981 comments = self._prev_comments or [] 6982 6983 if explicit and not any_token: 6984 return this 6985 6986 if self._match(TokenType.L_PAREN): 6987 aliases = self.expression( 6988 exp.Aliases, 6989 comments=comments, 6990 this=this, 6991 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6992 ) 6993 self._match_r_paren(aliases) 6994 return aliases 6995 6996 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6997 self.STRING_ALIASES and self._parse_string_as_identifier() 6998 ) 6999 7000 if alias: 7001 comments.extend(alias.pop_comments()) 7002 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7003 column = this.this 7004 7005 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7006 if not this.comments and column and column.comments: 7007 this.comments = column.pop_comments() 7008 7009 return this 7010 7011 def _parse_id_var( 7012 self, 7013 any_token: bool = True, 7014 tokens: t.Optional[t.Collection[TokenType]] = None, 7015 ) -> t.Optional[exp.Expression]: 7016 expression = self._parse_identifier() 7017 if not expression and ( 7018 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7019 ): 7020 quoted = self._prev.token_type == TokenType.STRING 7021 expression = self._identifier_expression(quoted=quoted) 7022 7023 return expression 7024 7025 def _parse_string(self) -> t.Optional[exp.Expression]: 7026 if self._match_set(self.STRING_PARSERS): 7027 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7028 return self._parse_placeholder() 7029 7030 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7031 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7032 if output: 7033 output.update_positions(self._prev) 7034 return output 7035 7036 def _parse_number(self) -> t.Optional[exp.Expression]: 7037 if self._match_set(self.NUMERIC_PARSERS): 7038 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7039 return self._parse_placeholder() 7040 7041 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7042 if self._match(TokenType.IDENTIFIER): 7043 return self._identifier_expression(quoted=True) 7044 return self._parse_placeholder() 7045 7046 def _parse_var( 7047 self, 7048 any_token: bool = False, 7049 tokens: t.Optional[t.Collection[TokenType]] = None, 7050 upper: bool = False, 7051 ) -> t.Optional[exp.Expression]: 7052 if ( 7053 (any_token and self._advance_any()) 7054 or self._match(TokenType.VAR) 7055 or (self._match_set(tokens) if tokens else False) 7056 ): 7057 return self.expression( 7058 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7059 ) 7060 return self._parse_placeholder() 7061 7062 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7063 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7064 self._advance() 7065 return self._prev 7066 return None 7067 7068 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7069 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7070 7071 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7072 return self._parse_primary() or self._parse_var(any_token=True) 7073 7074 def _parse_null(self) -> t.Optional[exp.Expression]: 7075 if self._match_set(self.NULL_TOKENS): 7076 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7077 return self._parse_placeholder() 7078 7079 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7080 if self._match(TokenType.TRUE): 7081 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7082 if self._match(TokenType.FALSE): 7083 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7084 return self._parse_placeholder() 7085 7086 def _parse_star(self) -> t.Optional[exp.Expression]: 7087 if self._match(TokenType.STAR): 7088 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7089 return self._parse_placeholder() 7090 7091 def _parse_parameter(self) -> exp.Parameter: 7092 this = self._parse_identifier() or self._parse_primary_or_var() 7093 return self.expression(exp.Parameter, this=this) 7094 7095 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7096 if self._match_set(self.PLACEHOLDER_PARSERS): 7097 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7098 if placeholder: 7099 return placeholder 7100 self._advance(-1) 7101 return None 7102 7103 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7104 if not self._match_texts(keywords): 7105 return None 7106 if self._match(TokenType.L_PAREN, advance=False): 7107 return self._parse_wrapped_csv(self._parse_expression) 7108 7109 expression = self._parse_expression() 7110 return [expression] if expression else None 7111 7112 def _parse_csv( 7113 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7114 ) -> t.List[exp.Expression]: 7115 parse_result = parse_method() 7116 items = [parse_result] if parse_result is not None else [] 7117 7118 while self._match(sep): 7119 self._add_comments(parse_result) 7120 parse_result = parse_method() 7121 if parse_result is not None: 7122 items.append(parse_result) 7123 7124 return items 7125 7126 def _parse_tokens( 7127 self, parse_method: t.Callable, expressions: t.Dict 7128 ) -> t.Optional[exp.Expression]: 7129 this = parse_method() 7130 7131 while self._match_set(expressions): 7132 this = self.expression( 7133 expressions[self._prev.token_type], 7134 this=this, 7135 comments=self._prev_comments, 7136 expression=parse_method(), 7137 ) 7138 7139 return this 7140 7141 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7142 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7143 7144 def _parse_wrapped_csv( 7145 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7146 ) -> t.List[exp.Expression]: 7147 return self._parse_wrapped( 7148 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7149 ) 7150 7151 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7152 wrapped = self._match(TokenType.L_PAREN) 7153 if not wrapped and not optional: 7154 self.raise_error("Expecting (") 7155 parse_result = parse_method() 7156 if wrapped: 7157 self._match_r_paren() 7158 return parse_result 7159 7160 def _parse_expressions(self) -> t.List[exp.Expression]: 7161 return self._parse_csv(self._parse_expression) 7162 7163 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7164 return self._parse_select() or self._parse_set_operations( 7165 self._parse_alias(self._parse_assignment(), explicit=True) 7166 if alias 7167 else self._parse_assignment() 7168 ) 7169 7170 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7171 return self._parse_query_modifiers( 7172 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7173 ) 7174 7175 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7176 this = None 7177 if self._match_texts(self.TRANSACTION_KIND): 7178 this = self._prev.text 7179 7180 self._match_texts(("TRANSACTION", "WORK")) 7181 7182 modes = [] 7183 while True: 7184 mode = [] 7185 while self._match(TokenType.VAR): 7186 mode.append(self._prev.text) 7187 7188 if mode: 7189 modes.append(" ".join(mode)) 7190 if not self._match(TokenType.COMMA): 7191 break 7192 7193 return self.expression(exp.Transaction, this=this, modes=modes) 7194 7195 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7196 chain = None 7197 savepoint = None 7198 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7199 7200 self._match_texts(("TRANSACTION", "WORK")) 7201 7202 if self._match_text_seq("TO"): 7203 self._match_text_seq("SAVEPOINT") 7204 savepoint = self._parse_id_var() 7205 7206 if self._match(TokenType.AND): 7207 chain = not self._match_text_seq("NO") 7208 self._match_text_seq("CHAIN") 7209 7210 if is_rollback: 7211 return self.expression(exp.Rollback, savepoint=savepoint) 7212 7213 return self.expression(exp.Commit, chain=chain) 7214 7215 def _parse_refresh(self) -> exp.Refresh: 7216 self._match(TokenType.TABLE) 7217 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7218 7219 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7220 if not self._match_text_seq("ADD"): 7221 return None 7222 7223 self._match(TokenType.COLUMN) 7224 exists_column = self._parse_exists(not_=True) 7225 expression = self._parse_field_def() 7226 7227 if expression: 7228 expression.set("exists", exists_column) 7229 7230 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7231 if self._match_texts(("FIRST", "AFTER")): 7232 position = self._prev.text 7233 column_position = self.expression( 7234 exp.ColumnPosition, this=self._parse_column(), position=position 7235 ) 7236 expression.set("position", column_position) 7237 7238 return expression 7239 7240 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7241 drop = self._match(TokenType.DROP) and self._parse_drop() 7242 if drop and not isinstance(drop, exp.Command): 7243 drop.set("kind", drop.args.get("kind", "COLUMN")) 7244 return drop 7245 7246 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7247 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7248 return self.expression( 7249 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7250 ) 7251 7252 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7253 index = self._index - 1 7254 7255 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7256 return self._parse_csv( 7257 lambda: self.expression( 7258 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7259 ) 7260 ) 7261 7262 self._retreat(index) 7263 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7264 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7265 7266 if self._match_text_seq("ADD", "COLUMNS"): 7267 schema = self._parse_schema() 7268 if schema: 7269 return [schema] 7270 return [] 7271 7272 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7273 7274 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7275 if self._match_texts(self.ALTER_ALTER_PARSERS): 7276 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7277 7278 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7279 # keyword after ALTER we default to parsing this statement 7280 self._match(TokenType.COLUMN) 7281 column = self._parse_field(any_token=True) 7282 7283 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7284 return self.expression(exp.AlterColumn, this=column, drop=True) 7285 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7286 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7287 if self._match(TokenType.COMMENT): 7288 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7289 if self._match_text_seq("DROP", "NOT", "NULL"): 7290 return self.expression( 7291 exp.AlterColumn, 7292 this=column, 7293 drop=True, 7294 allow_null=True, 7295 ) 7296 if self._match_text_seq("SET", "NOT", "NULL"): 7297 return self.expression( 7298 exp.AlterColumn, 7299 this=column, 7300 allow_null=False, 7301 ) 7302 7303 if self._match_text_seq("SET", "VISIBLE"): 7304 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7305 if self._match_text_seq("SET", "INVISIBLE"): 7306 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7307 7308 self._match_text_seq("SET", "DATA") 7309 self._match_text_seq("TYPE") 7310 return self.expression( 7311 exp.AlterColumn, 7312 this=column, 7313 dtype=self._parse_types(), 7314 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7315 using=self._match(TokenType.USING) and self._parse_assignment(), 7316 ) 7317 7318 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7319 if self._match_texts(("ALL", "EVEN", "AUTO")): 7320 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7321 7322 self._match_text_seq("KEY", "DISTKEY") 7323 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7324 7325 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7326 if compound: 7327 self._match_text_seq("SORTKEY") 7328 7329 if self._match(TokenType.L_PAREN, advance=False): 7330 return self.expression( 7331 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7332 ) 7333 7334 self._match_texts(("AUTO", "NONE")) 7335 return self.expression( 7336 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7337 ) 7338 7339 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7340 index = self._index - 1 7341 7342 partition_exists = self._parse_exists() 7343 if self._match(TokenType.PARTITION, advance=False): 7344 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7345 7346 self._retreat(index) 7347 return self._parse_csv(self._parse_drop_column) 7348 7349 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7350 if self._match(TokenType.COLUMN): 7351 exists = self._parse_exists() 7352 old_column = self._parse_column() 7353 to = self._match_text_seq("TO") 7354 new_column = self._parse_column() 7355 7356 if old_column is None or to is None or new_column is None: 7357 return None 7358 7359 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7360 7361 self._match_text_seq("TO") 7362 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7363 7364 def _parse_alter_table_set(self) -> exp.AlterSet: 7365 alter_set = self.expression(exp.AlterSet) 7366 7367 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7368 "TABLE", "PROPERTIES" 7369 ): 7370 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7371 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7372 alter_set.set("expressions", [self._parse_assignment()]) 7373 elif self._match_texts(("LOGGED", "UNLOGGED")): 7374 alter_set.set("option", exp.var(self._prev.text.upper())) 7375 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7376 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7377 elif self._match_text_seq("LOCATION"): 7378 alter_set.set("location", self._parse_field()) 7379 elif self._match_text_seq("ACCESS", "METHOD"): 7380 alter_set.set("access_method", self._parse_field()) 7381 elif self._match_text_seq("TABLESPACE"): 7382 alter_set.set("tablespace", self._parse_field()) 7383 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7384 alter_set.set("file_format", [self._parse_field()]) 7385 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7386 alter_set.set("file_format", self._parse_wrapped_options()) 7387 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7388 alter_set.set("copy_options", self._parse_wrapped_options()) 7389 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7390 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7391 else: 7392 if self._match_text_seq("SERDE"): 7393 alter_set.set("serde", self._parse_field()) 7394 7395 alter_set.set("expressions", [self._parse_properties()]) 7396 7397 return alter_set 7398 7399 def _parse_alter(self) -> exp.Alter | exp.Command: 7400 start = self._prev 7401 7402 alter_token = self._match_set(self.ALTERABLES) and self._prev 7403 if not alter_token: 7404 return self._parse_as_command(start) 7405 7406 exists = self._parse_exists() 7407 only = self._match_text_seq("ONLY") 7408 this = self._parse_table(schema=True) 7409 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7410 7411 if self._next: 7412 self._advance() 7413 7414 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7415 if parser: 7416 actions = ensure_list(parser(self)) 7417 not_valid = self._match_text_seq("NOT", "VALID") 7418 options = self._parse_csv(self._parse_property) 7419 7420 if not self._curr and actions: 7421 return self.expression( 7422 exp.Alter, 7423 this=this, 7424 kind=alter_token.text.upper(), 7425 exists=exists, 7426 actions=actions, 7427 only=only, 7428 options=options, 7429 cluster=cluster, 7430 not_valid=not_valid, 7431 ) 7432 7433 return self._parse_as_command(start) 7434 7435 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7436 start = self._prev 7437 # https://duckdb.org/docs/sql/statements/analyze 7438 if not self._curr: 7439 return self.expression(exp.Analyze) 7440 7441 options = [] 7442 while self._match_texts(self.ANALYZE_STYLES): 7443 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7444 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7445 else: 7446 options.append(self._prev.text.upper()) 7447 7448 this: t.Optional[exp.Expression] = None 7449 inner_expression: t.Optional[exp.Expression] = None 7450 7451 kind = self._curr and self._curr.text.upper() 7452 7453 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7454 this = self._parse_table_parts() 7455 elif self._match_text_seq("TABLES"): 7456 if self._match_set((TokenType.FROM, TokenType.IN)): 7457 kind = f"{kind} {self._prev.text.upper()}" 7458 this = self._parse_table(schema=True, is_db_reference=True) 7459 elif self._match_text_seq("DATABASE"): 7460 this = self._parse_table(schema=True, is_db_reference=True) 7461 elif self._match_text_seq("CLUSTER"): 7462 this = self._parse_table() 7463 # Try matching inner expr keywords before fallback to parse table. 7464 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7465 kind = None 7466 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7467 else: 7468 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7469 kind = None 7470 this = self._parse_table_parts() 7471 7472 partition = self._try_parse(self._parse_partition) 7473 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7474 return self._parse_as_command(start) 7475 7476 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7477 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7478 "WITH", "ASYNC", "MODE" 7479 ): 7480 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7481 else: 7482 mode = None 7483 7484 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7485 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7486 7487 properties = self._parse_properties() 7488 return self.expression( 7489 exp.Analyze, 7490 kind=kind, 7491 this=this, 7492 mode=mode, 7493 partition=partition, 7494 properties=properties, 7495 expression=inner_expression, 7496 options=options, 7497 ) 7498 7499 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7500 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7501 this = None 7502 kind = self._prev.text.upper() 7503 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7504 expressions = [] 7505 7506 if not self._match_text_seq("STATISTICS"): 7507 self.raise_error("Expecting token STATISTICS") 7508 7509 if self._match_text_seq("NOSCAN"): 7510 this = "NOSCAN" 7511 elif self._match(TokenType.FOR): 7512 if self._match_text_seq("ALL", "COLUMNS"): 7513 this = "FOR ALL COLUMNS" 7514 if self._match_texts("COLUMNS"): 7515 this = "FOR COLUMNS" 7516 expressions = self._parse_csv(self._parse_column_reference) 7517 elif self._match_text_seq("SAMPLE"): 7518 sample = self._parse_number() 7519 expressions = [ 7520 self.expression( 7521 exp.AnalyzeSample, 7522 sample=sample, 7523 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7524 ) 7525 ] 7526 7527 return self.expression( 7528 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7529 ) 7530 7531 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7532 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7533 kind = None 7534 this = None 7535 expression: t.Optional[exp.Expression] = None 7536 if self._match_text_seq("REF", "UPDATE"): 7537 kind = "REF" 7538 this = "UPDATE" 7539 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7540 this = "UPDATE SET DANGLING TO NULL" 7541 elif self._match_text_seq("STRUCTURE"): 7542 kind = "STRUCTURE" 7543 if self._match_text_seq("CASCADE", "FAST"): 7544 this = "CASCADE FAST" 7545 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7546 ("ONLINE", "OFFLINE") 7547 ): 7548 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7549 expression = self._parse_into() 7550 7551 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7552 7553 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7554 this = self._prev.text.upper() 7555 if self._match_text_seq("COLUMNS"): 7556 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7557 return None 7558 7559 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7560 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7561 if self._match_text_seq("STATISTICS"): 7562 return self.expression(exp.AnalyzeDelete, kind=kind) 7563 return None 7564 7565 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7566 if self._match_text_seq("CHAINED", "ROWS"): 7567 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7568 return None 7569 7570 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7571 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7572 this = self._prev.text.upper() 7573 expression: t.Optional[exp.Expression] = None 7574 expressions = [] 7575 update_options = None 7576 7577 if self._match_text_seq("HISTOGRAM", "ON"): 7578 expressions = self._parse_csv(self._parse_column_reference) 7579 with_expressions = [] 7580 while self._match(TokenType.WITH): 7581 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7582 if self._match_texts(("SYNC", "ASYNC")): 7583 if self._match_text_seq("MODE", advance=False): 7584 with_expressions.append(f"{self._prev.text.upper()} MODE") 7585 self._advance() 7586 else: 7587 buckets = self._parse_number() 7588 if self._match_text_seq("BUCKETS"): 7589 with_expressions.append(f"{buckets} BUCKETS") 7590 if with_expressions: 7591 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7592 7593 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7594 TokenType.UPDATE, advance=False 7595 ): 7596 update_options = self._prev.text.upper() 7597 self._advance() 7598 elif self._match_text_seq("USING", "DATA"): 7599 expression = self.expression(exp.UsingData, this=self._parse_string()) 7600 7601 return self.expression( 7602 exp.AnalyzeHistogram, 7603 this=this, 7604 expressions=expressions, 7605 expression=expression, 7606 update_options=update_options, 7607 ) 7608 7609 def _parse_merge(self) -> exp.Merge: 7610 self._match(TokenType.INTO) 7611 target = self._parse_table() 7612 7613 if target and self._match(TokenType.ALIAS, advance=False): 7614 target.set("alias", self._parse_table_alias()) 7615 7616 self._match(TokenType.USING) 7617 using = self._parse_table() 7618 7619 self._match(TokenType.ON) 7620 on = self._parse_assignment() 7621 7622 return self.expression( 7623 exp.Merge, 7624 this=target, 7625 using=using, 7626 on=on, 7627 whens=self._parse_when_matched(), 7628 returning=self._parse_returning(), 7629 ) 7630 7631 def _parse_when_matched(self) -> exp.Whens: 7632 whens = [] 7633 7634 while self._match(TokenType.WHEN): 7635 matched = not self._match(TokenType.NOT) 7636 self._match_text_seq("MATCHED") 7637 source = ( 7638 False 7639 if self._match_text_seq("BY", "TARGET") 7640 else self._match_text_seq("BY", "SOURCE") 7641 ) 7642 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7643 7644 self._match(TokenType.THEN) 7645 7646 if self._match(TokenType.INSERT): 7647 this = self._parse_star() 7648 if this: 7649 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7650 else: 7651 then = self.expression( 7652 exp.Insert, 7653 this=exp.var("ROW") 7654 if self._match_text_seq("ROW") 7655 else self._parse_value(values=False), 7656 expression=self._match_text_seq("VALUES") and self._parse_value(), 7657 ) 7658 elif self._match(TokenType.UPDATE): 7659 expressions = self._parse_star() 7660 if expressions: 7661 then = self.expression(exp.Update, expressions=expressions) 7662 else: 7663 then = self.expression( 7664 exp.Update, 7665 expressions=self._match(TokenType.SET) 7666 and self._parse_csv(self._parse_equality), 7667 ) 7668 elif self._match(TokenType.DELETE): 7669 then = self.expression(exp.Var, this=self._prev.text) 7670 else: 7671 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7672 7673 whens.append( 7674 self.expression( 7675 exp.When, 7676 matched=matched, 7677 source=source, 7678 condition=condition, 7679 then=then, 7680 ) 7681 ) 7682 return self.expression(exp.Whens, expressions=whens) 7683 7684 def _parse_show(self) -> t.Optional[exp.Expression]: 7685 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7686 if parser: 7687 return parser(self) 7688 return self._parse_as_command(self._prev) 7689 7690 def _parse_set_item_assignment( 7691 self, kind: t.Optional[str] = None 7692 ) -> t.Optional[exp.Expression]: 7693 index = self._index 7694 7695 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7696 return self._parse_set_transaction(global_=kind == "GLOBAL") 7697 7698 left = self._parse_primary() or self._parse_column() 7699 assignment_delimiter = self._match_texts(("=", "TO")) 7700 7701 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7702 self._retreat(index) 7703 return None 7704 7705 right = self._parse_statement() or self._parse_id_var() 7706 if isinstance(right, (exp.Column, exp.Identifier)): 7707 right = exp.var(right.name) 7708 7709 this = self.expression(exp.EQ, this=left, expression=right) 7710 return self.expression(exp.SetItem, this=this, kind=kind) 7711 7712 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7713 self._match_text_seq("TRANSACTION") 7714 characteristics = self._parse_csv( 7715 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7716 ) 7717 return self.expression( 7718 exp.SetItem, 7719 expressions=characteristics, 7720 kind="TRANSACTION", 7721 **{"global": global_}, # type: ignore 7722 ) 7723 7724 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7725 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7726 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7727 7728 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7729 index = self._index 7730 set_ = self.expression( 7731 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7732 ) 7733 7734 if self._curr: 7735 self._retreat(index) 7736 return self._parse_as_command(self._prev) 7737 7738 return set_ 7739 7740 def _parse_var_from_options( 7741 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7742 ) -> t.Optional[exp.Var]: 7743 start = self._curr 7744 if not start: 7745 return None 7746 7747 option = start.text.upper() 7748 continuations = options.get(option) 7749 7750 index = self._index 7751 self._advance() 7752 for keywords in continuations or []: 7753 if isinstance(keywords, str): 7754 keywords = (keywords,) 7755 7756 if self._match_text_seq(*keywords): 7757 option = f"{option} {' '.join(keywords)}" 7758 break 7759 else: 7760 if continuations or continuations is None: 7761 if raise_unmatched: 7762 self.raise_error(f"Unknown option {option}") 7763 7764 self._retreat(index) 7765 return None 7766 7767 return exp.var(option) 7768 7769 def _parse_as_command(self, start: Token) -> exp.Command: 7770 while self._curr: 7771 self._advance() 7772 text = self._find_sql(start, self._prev) 7773 size = len(start.text) 7774 self._warn_unsupported() 7775 return exp.Command(this=text[:size], expression=text[size:]) 7776 7777 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7778 settings = [] 7779 7780 self._match_l_paren() 7781 kind = self._parse_id_var() 7782 7783 if self._match(TokenType.L_PAREN): 7784 while True: 7785 key = self._parse_id_var() 7786 value = self._parse_primary() 7787 if not key and value is None: 7788 break 7789 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7790 self._match(TokenType.R_PAREN) 7791 7792 self._match_r_paren() 7793 7794 return self.expression( 7795 exp.DictProperty, 7796 this=this, 7797 kind=kind.this if kind else None, 7798 settings=settings, 7799 ) 7800 7801 def _parse_dict_range(self, this: str) -> exp.DictRange: 7802 self._match_l_paren() 7803 has_min = self._match_text_seq("MIN") 7804 if has_min: 7805 min = self._parse_var() or self._parse_primary() 7806 self._match_text_seq("MAX") 7807 max = self._parse_var() or self._parse_primary() 7808 else: 7809 max = self._parse_var() or self._parse_primary() 7810 min = exp.Literal.number(0) 7811 self._match_r_paren() 7812 return self.expression(exp.DictRange, this=this, min=min, max=max) 7813 7814 def _parse_comprehension( 7815 self, this: t.Optional[exp.Expression] 7816 ) -> t.Optional[exp.Comprehension]: 7817 index = self._index 7818 expression = self._parse_column() 7819 if not self._match(TokenType.IN): 7820 self._retreat(index - 1) 7821 return None 7822 iterator = self._parse_column() 7823 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7824 return self.expression( 7825 exp.Comprehension, 7826 this=this, 7827 expression=expression, 7828 iterator=iterator, 7829 condition=condition, 7830 ) 7831 7832 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7833 if self._match(TokenType.HEREDOC_STRING): 7834 return self.expression(exp.Heredoc, this=self._prev.text) 7835 7836 if not self._match_text_seq("$"): 7837 return None 7838 7839 tags = ["$"] 7840 tag_text = None 7841 7842 if self._is_connected(): 7843 self._advance() 7844 tags.append(self._prev.text.upper()) 7845 else: 7846 self.raise_error("No closing $ found") 7847 7848 if tags[-1] != "$": 7849 if self._is_connected() and self._match_text_seq("$"): 7850 tag_text = tags[-1] 7851 tags.append("$") 7852 else: 7853 self.raise_error("No closing $ found") 7854 7855 heredoc_start = self._curr 7856 7857 while self._curr: 7858 if self._match_text_seq(*tags, advance=False): 7859 this = self._find_sql(heredoc_start, self._prev) 7860 self._advance(len(tags)) 7861 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7862 7863 self._advance() 7864 7865 self.raise_error(f"No closing {''.join(tags)} found") 7866 return None 7867 7868 def _find_parser( 7869 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7870 ) -> t.Optional[t.Callable]: 7871 if not self._curr: 7872 return None 7873 7874 index = self._index 7875 this = [] 7876 while True: 7877 # The current token might be multiple words 7878 curr = self._curr.text.upper() 7879 key = curr.split(" ") 7880 this.append(curr) 7881 7882 self._advance() 7883 result, trie = in_trie(trie, key) 7884 if result == TrieResult.FAILED: 7885 break 7886 7887 if result == TrieResult.EXISTS: 7888 subparser = parsers[" ".join(this)] 7889 return subparser 7890 7891 self._retreat(index) 7892 return None 7893 7894 def _match(self, token_type, advance=True, expression=None): 7895 if not self._curr: 7896 return None 7897 7898 if self._curr.token_type == token_type: 7899 if advance: 7900 self._advance() 7901 self._add_comments(expression) 7902 return True 7903 7904 return None 7905 7906 def _match_set(self, types, advance=True): 7907 if not self._curr: 7908 return None 7909 7910 if self._curr.token_type in types: 7911 if advance: 7912 self._advance() 7913 return True 7914 7915 return None 7916 7917 def _match_pair(self, token_type_a, token_type_b, advance=True): 7918 if not self._curr or not self._next: 7919 return None 7920 7921 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7922 if advance: 7923 self._advance(2) 7924 return True 7925 7926 return None 7927 7928 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7929 if not self._match(TokenType.L_PAREN, expression=expression): 7930 self.raise_error("Expecting (") 7931 7932 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7933 if not self._match(TokenType.R_PAREN, expression=expression): 7934 self.raise_error("Expecting )") 7935 7936 def _match_texts(self, texts, advance=True): 7937 if ( 7938 self._curr 7939 and self._curr.token_type != TokenType.STRING 7940 and self._curr.text.upper() in texts 7941 ): 7942 if advance: 7943 self._advance() 7944 return True 7945 return None 7946 7947 def _match_text_seq(self, *texts, advance=True): 7948 index = self._index 7949 for text in texts: 7950 if ( 7951 self._curr 7952 and self._curr.token_type != TokenType.STRING 7953 and self._curr.text.upper() == text 7954 ): 7955 self._advance() 7956 else: 7957 self._retreat(index) 7958 return None 7959 7960 if not advance: 7961 self._retreat(index) 7962 7963 return True 7964 7965 def _replace_lambda( 7966 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7967 ) -> t.Optional[exp.Expression]: 7968 if not node: 7969 return node 7970 7971 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7972 7973 for column in node.find_all(exp.Column): 7974 typ = lambda_types.get(column.parts[0].name) 7975 if typ is not None: 7976 dot_or_id = column.to_dot() if column.table else column.this 7977 7978 if typ: 7979 dot_or_id = self.expression( 7980 exp.Cast, 7981 this=dot_or_id, 7982 to=typ, 7983 ) 7984 7985 parent = column.parent 7986 7987 while isinstance(parent, exp.Dot): 7988 if not isinstance(parent.parent, exp.Dot): 7989 parent.replace(dot_or_id) 7990 break 7991 parent = parent.parent 7992 else: 7993 if column is node: 7994 node = dot_or_id 7995 else: 7996 column.replace(dot_or_id) 7997 return node 7998 7999 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8000 start = self._prev 8001 8002 # Not to be confused with TRUNCATE(number, decimals) function call 8003 if self._match(TokenType.L_PAREN): 8004 self._retreat(self._index - 2) 8005 return self._parse_function() 8006 8007 # Clickhouse supports TRUNCATE DATABASE as well 8008 is_database = self._match(TokenType.DATABASE) 8009 8010 self._match(TokenType.TABLE) 8011 8012 exists = self._parse_exists(not_=False) 8013 8014 expressions = self._parse_csv( 8015 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8016 ) 8017 8018 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8019 8020 if self._match_text_seq("RESTART", "IDENTITY"): 8021 identity = "RESTART" 8022 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8023 identity = "CONTINUE" 8024 else: 8025 identity = None 8026 8027 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8028 option = self._prev.text 8029 else: 8030 option = None 8031 8032 partition = self._parse_partition() 8033 8034 # Fallback case 8035 if self._curr: 8036 return self._parse_as_command(start) 8037 8038 return self.expression( 8039 exp.TruncateTable, 8040 expressions=expressions, 8041 is_database=is_database, 8042 exists=exists, 8043 cluster=cluster, 8044 identity=identity, 8045 option=option, 8046 partition=partition, 8047 ) 8048 8049 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8050 this = self._parse_ordered(self._parse_opclass) 8051 8052 if not self._match(TokenType.WITH): 8053 return this 8054 8055 op = self._parse_var(any_token=True) 8056 8057 return self.expression(exp.WithOperator, this=this, op=op) 8058 8059 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8060 self._match(TokenType.EQ) 8061 self._match(TokenType.L_PAREN) 8062 8063 opts: t.List[t.Optional[exp.Expression]] = [] 8064 option: exp.Expression | None 8065 while self._curr and not self._match(TokenType.R_PAREN): 8066 if self._match_text_seq("FORMAT_NAME", "="): 8067 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8068 option = self._parse_format_name() 8069 else: 8070 option = self._parse_property() 8071 8072 if option is None: 8073 self.raise_error("Unable to parse option") 8074 break 8075 8076 opts.append(option) 8077 8078 return opts 8079 8080 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8081 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8082 8083 options = [] 8084 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8085 option = self._parse_var(any_token=True) 8086 prev = self._prev.text.upper() 8087 8088 # Different dialects might separate options and values by white space, "=" and "AS" 8089 self._match(TokenType.EQ) 8090 self._match(TokenType.ALIAS) 8091 8092 param = self.expression(exp.CopyParameter, this=option) 8093 8094 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8095 TokenType.L_PAREN, advance=False 8096 ): 8097 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8098 param.set("expressions", self._parse_wrapped_options()) 8099 elif prev == "FILE_FORMAT": 8100 # T-SQL's external file format case 8101 param.set("expression", self._parse_field()) 8102 else: 8103 param.set("expression", self._parse_unquoted_field()) 8104 8105 options.append(param) 8106 self._match(sep) 8107 8108 return options 8109 8110 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8111 expr = self.expression(exp.Credentials) 8112 8113 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8114 expr.set("storage", self._parse_field()) 8115 if self._match_text_seq("CREDENTIALS"): 8116 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8117 creds = ( 8118 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8119 ) 8120 expr.set("credentials", creds) 8121 if self._match_text_seq("ENCRYPTION"): 8122 expr.set("encryption", self._parse_wrapped_options()) 8123 if self._match_text_seq("IAM_ROLE"): 8124 expr.set("iam_role", self._parse_field()) 8125 if self._match_text_seq("REGION"): 8126 expr.set("region", self._parse_field()) 8127 8128 return expr 8129 8130 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8131 return self._parse_field() 8132 8133 def _parse_copy(self) -> exp.Copy | exp.Command: 8134 start = self._prev 8135 8136 self._match(TokenType.INTO) 8137 8138 this = ( 8139 self._parse_select(nested=True, parse_subquery_alias=False) 8140 if self._match(TokenType.L_PAREN, advance=False) 8141 else self._parse_table(schema=True) 8142 ) 8143 8144 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8145 8146 files = self._parse_csv(self._parse_file_location) 8147 credentials = self._parse_credentials() 8148 8149 self._match_text_seq("WITH") 8150 8151 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8152 8153 # Fallback case 8154 if self._curr: 8155 return self._parse_as_command(start) 8156 8157 return self.expression( 8158 exp.Copy, 8159 this=this, 8160 kind=kind, 8161 credentials=credentials, 8162 files=files, 8163 params=params, 8164 ) 8165 8166 def _parse_normalize(self) -> exp.Normalize: 8167 return self.expression( 8168 exp.Normalize, 8169 this=self._parse_bitwise(), 8170 form=self._match(TokenType.COMMA) and self._parse_var(), 8171 ) 8172 8173 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8174 args = self._parse_csv(lambda: self._parse_lambda()) 8175 8176 this = seq_get(args, 0) 8177 decimals = seq_get(args, 1) 8178 8179 return expr_type( 8180 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8181 ) 8182 8183 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8184 if self._match_text_seq("COLUMNS", "(", advance=False): 8185 this = self._parse_function() 8186 if isinstance(this, exp.Columns): 8187 this.set("unpack", True) 8188 return this 8189 8190 return self.expression( 8191 exp.Star, 8192 **{ # type: ignore 8193 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8194 "replace": self._parse_star_op("REPLACE"), 8195 "rename": self._parse_star_op("RENAME"), 8196 }, 8197 ) 8198 8199 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8200 privilege_parts = [] 8201 8202 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8203 # (end of privilege list) or L_PAREN (start of column list) are met 8204 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8205 privilege_parts.append(self._curr.text.upper()) 8206 self._advance() 8207 8208 this = exp.var(" ".join(privilege_parts)) 8209 expressions = ( 8210 self._parse_wrapped_csv(self._parse_column) 8211 if self._match(TokenType.L_PAREN, advance=False) 8212 else None 8213 ) 8214 8215 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8216 8217 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8218 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8219 principal = self._parse_id_var() 8220 8221 if not principal: 8222 return None 8223 8224 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8225 8226 def _parse_grant(self) -> exp.Grant | exp.Command: 8227 start = self._prev 8228 8229 privileges = self._parse_csv(self._parse_grant_privilege) 8230 8231 self._match(TokenType.ON) 8232 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8233 8234 # Attempt to parse the securable e.g. MySQL allows names 8235 # such as "foo.*", "*.*" which are not easily parseable yet 8236 securable = self._try_parse(self._parse_table_parts) 8237 8238 if not securable or not self._match_text_seq("TO"): 8239 return self._parse_as_command(start) 8240 8241 principals = self._parse_csv(self._parse_grant_principal) 8242 8243 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8244 8245 if self._curr: 8246 return self._parse_as_command(start) 8247 8248 return self.expression( 8249 exp.Grant, 8250 privileges=privileges, 8251 kind=kind, 8252 securable=securable, 8253 principals=principals, 8254 grant_option=grant_option, 8255 ) 8256 8257 def _parse_overlay(self) -> exp.Overlay: 8258 return self.expression( 8259 exp.Overlay, 8260 **{ # type: ignore 8261 "this": self._parse_bitwise(), 8262 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8263 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8264 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8265 }, 8266 ) 8267 8268 def _parse_format_name(self) -> exp.Property: 8269 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8270 # for FILE_FORMAT = <format_name> 8271 return self.expression( 8272 exp.Property, 8273 this=exp.var("FORMAT_NAME"), 8274 value=self._parse_string() or self._parse_table_parts(), 8275 ) 8276 8277 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8278 args: t.List[exp.Expression] = [] 8279 8280 if self._match(TokenType.DISTINCT): 8281 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8282 self._match(TokenType.COMMA) 8283 8284 args.extend(self._parse_csv(self._parse_assignment)) 8285 8286 return self.expression( 8287 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8288 ) 8289 8290 def _identifier_expression( 8291 self, token: t.Optional[Token] = None, **kwargs: t.Any 8292 ) -> exp.Identifier: 8293 token = token or self._prev 8294 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8295 expression.update_positions(token) 8296 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset()
1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 )
Logs or raises any found errors, depending on the chosen error level setting.
1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 )